{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "hello_char_rnn.ipynb",
      "version": "0.3.2",
      "provenance": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python2",
      "display_name": "Python 2"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "[View in Colaboratory](https://colab.research.google.com/github/utensil/julia-playground/blob/master/dl/hello_char_rnn.ipynb)"
      ]
    },
    {
      "metadata": {
        "id": "_P2RN-qFOqhz",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 118
        },
        "outputId": "cd6b5500-dd7f-4a25-97f2-7f70d76d29e2"
      },
      "cell_type": "code",
      "source": [
        "# memory footprint support libraries/code\n",
        "!ln -sf /opt/bin/nvidia-smi /usr/bin/nvidia-smi\n",
        "!pip install gputil\n",
        "!pip install psutil\n",
        "!pip install humanize\n",
        "import psutil\n",
        "import humanize\n",
        "import os\n",
        "import GPUtil as GPU\n",
        "GPUs = GPU.getGPUs()\n",
        "# XXX: only one GPU on Colab and isn’t guaranteed\n",
        "gpu = GPUs[0]\n",
        "def printm():\n",
        " process = psutil.Process(os.getpid())\n",
        " print(\"Gen RAM Free: \" + humanize.naturalsize( psutil.virtual_memory().available ), \" | Proc size: \" + humanize.naturalsize( process.memory_info().rss))\n",
        " print(\"GPU RAM Free: {0:.0f}MB | Used: {1:.0f}MB | Util {2:3.0f}% | Total {3:.0f}MB\".format(gpu.memoryFree, gpu.memoryUsed, gpu.memoryUtil*100, gpu.memoryTotal))\n",
        "printm()"
      ],
      "execution_count": 2,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Requirement already satisfied: gputil in /usr/local/lib/python2.7/dist-packages (1.3.0)\r\n",
            "Requirement already satisfied: numpy in /usr/local/lib/python2.7/dist-packages (from gputil) (1.14.5)\n",
            "Requirement already satisfied: psutil in /usr/local/lib/python2.7/dist-packages (5.4.6)\n",
            "Requirement already satisfied: humanize in /usr/local/lib/python2.7/dist-packages (0.5.1)\n",
            "('Gen RAM Free: 12.6 GB', ' | Proc size: 150.3 MB')\n",
            "GPU RAM Free: 11438MB | Used: 1MB | Util   0% | Total 11439MB\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "utxZQT8VNmRb",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 558
        },
        "outputId": "21260745-fe46-46a1-ef3d-a801efb33e25"
      },
      "cell_type": "code",
      "source": [
        "!apt install libnvrtc8.0\n",
        "!pip install mxnet-cu80\n",
        "import mxnet as mx"
      ],
      "execution_count": 4,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Reading package lists... Done\n",
            "Building dependency tree       \n",
            "Reading state information... Done\n",
            "The following NEW packages will be installed:\n",
            "  libnvrtc8.0\n",
            "0 upgraded, 1 newly installed, 0 to remove and 0 not upgraded.\n",
            "Need to get 6,225 kB of archives.\n",
            "After this operation, 28.3 MB of additional disk space will be used.\n",
            "Get:1 http://archive.ubuntu.com/ubuntu artful/multiverse amd64 libnvrtc8.0 amd64 8.0.61-1 [6,225 kB]\n",
            "Fetched 6,225 kB in 1s (5,443 kB/s)\n",
            "\n",
            "\u001b7\u001b[0;23r\u001b8\u001b[1ASelecting previously unselected package libnvrtc8.0:amd64.\n",
            "(Reading database ... 18396 files and directories currently installed.)\n",
            "Preparing to unpack .../libnvrtc8.0_8.0.61-1_amd64.deb ...\n",
            "\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [  0%]\u001b[49m\u001b[39m [..........................................................] \u001b8\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 16%]\u001b[49m\u001b[39m [#########.................................................] \u001b8Unpacking libnvrtc8.0:amd64 (8.0.61-1) ...\n",
            "\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 33%]\u001b[49m\u001b[39m [###################.......................................] \u001b8\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 50%]\u001b[49m\u001b[39m [#############################.............................] \u001b8Setting up libnvrtc8.0:amd64 (8.0.61-1) ...\n",
            "\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 66%]\u001b[49m\u001b[39m [######################################....................] \u001b8\u001b7\u001b[24;0f\u001b[42m\u001b[30mProgress: [ 83%]\u001b[49m\u001b[39m [################################################..........] \u001b8Processing triggers for libc-bin (2.26-0ubuntu2.1) ...\n",
            "\n",
            "\u001b7\u001b[0;24r\u001b8\u001b[1A\u001b[JCollecting mxnet-cu80\n",
            "\u001b[?25l  Downloading https://files.pythonhosted.org/packages/f6/6c/566a1d4b8b1005b7d9ccfaecd7632f6dca596246f6657827b2d4e97c72c7/mxnet_cu80-1.2.1-py2.py3-none-manylinux1_x86_64.whl (299.1MB)\n",
            "\u001b[K    23% |███████▌                        | 70.3MB 27.8MB/s eta 0:00:09"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "\u001b[K    100% |████████████████████████████████| 299.1MB 58kB/s \n",
            "\u001b[?25hCollecting graphviz<0.9.0,>=0.8.1 (from mxnet-cu80)\n",
            "  Downloading https://files.pythonhosted.org/packages/53/39/4ab213673844e0c004bed8a0781a0721a3f6bb23eb8854ee75c236428892/graphviz-0.8.4-py2.py3-none-any.whl\n",
            "Requirement already satisfied: numpy<1.15.0,>=1.8.2 in /usr/local/lib/python2.7/dist-packages (from mxnet-cu80) (1.14.5)\n",
            "Requirement already satisfied: requests<2.19.0,>=2.18.4 in /usr/local/lib/python2.7/dist-packages (from mxnet-cu80) (2.18.4)\n",
            "Requirement already satisfied: idna<2.7,>=2.5 in /usr/local/lib/python2.7/dist-packages (from requests<2.19.0,>=2.18.4->mxnet-cu80) (2.6)\n",
            "Requirement already satisfied: urllib3<1.23,>=1.21.1 in /usr/local/lib/python2.7/dist-packages (from requests<2.19.0,>=2.18.4->mxnet-cu80) (1.22)\n",
            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python2.7/dist-packages (from requests<2.19.0,>=2.18.4->mxnet-cu80) (2018.4.16)\n",
            "Requirement already satisfied: chardet<3.1.0,>=3.0.2 in /usr/local/lib/python2.7/dist-packages (from requests<2.19.0,>=2.18.4->mxnet-cu80) (3.0.4)\n",
            "Installing collected packages: graphviz, mxnet-cu80\n",
            "Successfully installed graphviz-0.8.4 mxnet-cu80-1.2.1\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "0uKquLbhQm3_",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 84
        },
        "outputId": "8ccfac55-4e1e-4e2d-a319-6a531abc0da9"
      },
      "cell_type": "code",
      "source": [
        "!git clone https://github.com/hzy46/Char-RNN-TensorFlow.git"
      ],
      "execution_count": 8,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "Cloning into 'Char-RNN-TensorFlow'...\n",
            "remote: Counting objects: 26, done.\u001b[K\n",
            "remote: Total 26 (delta 0), reused 0 (delta 0), pack-reused 26\u001b[K\n",
            "Unpacking objects: 100% (26/26), done.\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "YuHP13HeST6z",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 17173
        },
        "outputId": "b4fe1e6c-49e9-4b4b-cf6a-0192a8003fcb"
      },
      "cell_type": "code",
      "source": [
        "!cd Char-RNN-TensorFlow && python train.py --use_embedding --input_file data/poetry.txt --name poetry --learning_rate 0.005 --num_steps 26 --num_seqs 32 --max_steps 10000"
      ],
      "execution_count": 9,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "5387\n",
            "3501\n",
            "WARNING:tensorflow:From /content/Char-RNN-TensorFlow/model.py:93: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n",
            "Instructions for updating:\n",
            "\n",
            "Future major versions of TensorFlow will allow gradients to flow\n",
            "into the labels input on backprop by default.\n",
            "\n",
            "See @{tf.nn.softmax_cross_entropy_with_logits_v2}.\n",
            "\n",
            "2018-07-28 14:28:23.339235: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:897] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2018-07-28 14:28:23.339827: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1392] Found device 0 with properties: \n",
            "name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235\n",
            "pciBusID: 0000:00:04.0\n",
            "totalMemory: 11.17GiB freeMemory: 11.10GiB\n",
            "2018-07-28 14:28:23.339881: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0\n",
            "2018-07-28 14:28:23.800867: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
            "2018-07-28 14:28:23.800958: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958]      0 \n",
            "2018-07-28 14:28:23.800987: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0:   N \n",
            "2018-07-28 14:28:23.801406: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10763 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7)\n",
            "step: 10/10000...  loss: 6.5503...  0.0976 sec/batch\n",
            "step: 20/10000...  loss: 6.5084...  0.0995 sec/batch\n",
            "step: 30/10000...  loss: 6.3076...  0.0980 sec/batch\n",
            "step: 40/10000...  loss: 6.2250...  0.1381 sec/batch\n",
            "step: 50/10000...  loss: 6.0408...  0.0891 sec/batch\n",
            "step: 60/10000...  loss: 5.8749...  0.0840 sec/batch\n",
            "step: 70/10000...  loss: 5.7850...  0.0878 sec/batch\n",
            "step: 80/10000...  loss: 5.7318...  0.0983 sec/batch\n",
            "step: 90/10000...  loss: 5.6727...  0.0870 sec/batch\n",
            "step: 100/10000...  loss: 5.5724...  0.0877 sec/batch\n",
            "step: 110/10000...  loss: 5.5872...  0.0835 sec/batch\n",
            "step: 120/10000...  loss: 5.4830...  0.0966 sec/batch\n",
            "step: 130/10000...  loss: 5.4636...  0.0872 sec/batch\n",
            "step: 140/10000...  loss: 5.4168...  0.0886 sec/batch\n",
            "step: 150/10000...  loss: 5.4805...  0.1149 sec/batch\n",
            "step: 160/10000...  loss: 5.4889...  0.0846 sec/batch\n",
            "step: 170/10000...  loss: 5.4045...  0.0937 sec/batch\n",
            "step: 180/10000...  loss: 5.4422...  0.0905 sec/batch\n",
            "step: 190/10000...  loss: 5.4532...  0.1111 sec/batch\n",
            "step: 200/10000...  loss: 5.3409...  0.0842 sec/batch\n",
            "step: 210/10000...  loss: 5.3405...  0.0884 sec/batch\n",
            "step: 220/10000...  loss: 5.4533...  0.0874 sec/batch\n",
            "step: 230/10000...  loss: 5.3560...  0.0832 sec/batch\n",
            "step: 240/10000...  loss: 5.3247...  0.0952 sec/batch\n",
            "step: 250/10000...  loss: 5.3851...  0.0936 sec/batch\n",
            "step: 260/10000...  loss: 5.4183...  0.0867 sec/batch\n",
            "step: 270/10000...  loss: 5.2965...  0.0878 sec/batch\n",
            "step: 280/10000...  loss: 5.3577...  0.0836 sec/batch\n",
            "step: 290/10000...  loss: 5.3775...  0.0930 sec/batch\n",
            "step: 300/10000...  loss: 5.3400...  0.1183 sec/batch\n",
            "step: 310/10000...  loss: 5.3416...  0.0849 sec/batch\n",
            "step: 320/10000...  loss: 5.3459...  0.0811 sec/batch\n",
            "step: 330/10000...  loss: 5.3807...  0.1006 sec/batch\n",
            "step: 340/10000...  loss: 5.4062...  0.0838 sec/batch\n",
            "step: 350/10000...  loss: 5.3606...  0.0880 sec/batch\n",
            "step: 360/10000...  loss: 5.3277...  0.0944 sec/batch\n",
            "step: 370/10000...  loss: 5.3535...  0.0907 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 380/10000...  loss: 5.2762...  0.0889 sec/batch\n",
            "step: 390/10000...  loss: 5.4210...  0.0928 sec/batch\n",
            "step: 400/10000...  loss: 5.3872...  0.0838 sec/batch\n",
            "step: 410/10000...  loss: 5.3875...  0.1104 sec/batch\n",
            "step: 420/10000...  loss: 5.3337...  0.0878 sec/batch\n",
            "step: 430/10000...  loss: 5.3171...  0.0893 sec/batch\n",
            "step: 440/10000...  loss: 5.1649...  0.0990 sec/batch\n",
            "step: 450/10000...  loss: 5.3437...  0.1052 sec/batch\n",
            "step: 460/10000...  loss: 5.3547...  0.0838 sec/batch\n",
            "step: 470/10000...  loss: 5.2432...  0.0885 sec/batch\n",
            "step: 480/10000...  loss: 5.3419...  0.0849 sec/batch\n",
            "step: 490/10000...  loss: 5.2243...  0.0871 sec/batch\n",
            "step: 500/10000...  loss: 5.2685...  0.0850 sec/batch\n",
            "step: 510/10000...  loss: 5.3271...  0.0918 sec/batch\n",
            "step: 520/10000...  loss: 5.3210...  0.0888 sec/batch\n",
            "step: 530/10000...  loss: 5.2131...  0.0957 sec/batch\n",
            "step: 540/10000...  loss: 5.2771...  0.1139 sec/batch\n",
            "step: 550/10000...  loss: 5.3214...  0.0960 sec/batch\n",
            "step: 560/10000...  loss: 5.2217...  0.1049 sec/batch\n",
            "step: 570/10000...  loss: 5.3000...  0.0862 sec/batch\n",
            "step: 580/10000...  loss: 5.2852...  0.0858 sec/batch\n",
            "step: 590/10000...  loss: 5.2651...  0.0832 sec/batch\n",
            "step: 600/10000...  loss: 5.3222...  0.1118 sec/batch\n",
            "step: 610/10000...  loss: 5.1769...  0.0898 sec/batch\n",
            "step: 620/10000...  loss: 5.1711...  0.0859 sec/batch\n",
            "step: 630/10000...  loss: 5.1479...  0.0899 sec/batch\n",
            "step: 640/10000...  loss: 5.2436...  0.0904 sec/batch\n",
            "step: 650/10000...  loss: 5.1731...  0.0952 sec/batch\n",
            "step: 660/10000...  loss: 5.2951...  0.0946 sec/batch\n",
            "step: 670/10000...  loss: 5.3889...  0.0833 sec/batch\n",
            "step: 680/10000...  loss: 5.2643...  0.0886 sec/batch\n",
            "step: 690/10000...  loss: 5.3085...  0.0862 sec/batch\n",
            "step: 700/10000...  loss: 5.2041...  0.0856 sec/batch\n",
            "step: 710/10000...  loss: 5.1424...  0.0882 sec/batch\n",
            "step: 720/10000...  loss: 5.2828...  0.0935 sec/batch\n",
            "step: 730/10000...  loss: 5.1889...  0.0878 sec/batch\n",
            "step: 740/10000...  loss: 5.1116...  0.0879 sec/batch\n",
            "step: 750/10000...  loss: 5.1667...  0.0860 sec/batch\n",
            "step: 760/10000...  loss: 5.1459...  0.0914 sec/batch\n",
            "step: 770/10000...  loss: 5.1754...  0.0906 sec/batch\n",
            "step: 780/10000...  loss: 5.2236...  0.0918 sec/batch\n",
            "step: 790/10000...  loss: 5.1612...  0.1019 sec/batch\n",
            "step: 800/10000...  loss: 5.2516...  0.0903 sec/batch\n",
            "step: 810/10000...  loss: 5.0950...  0.0884 sec/batch\n",
            "step: 820/10000...  loss: 5.2079...  0.0950 sec/batch\n",
            "step: 830/10000...  loss: 5.1323...  0.0876 sec/batch\n",
            "step: 840/10000...  loss: 5.1534...  0.0880 sec/batch\n",
            "step: 850/10000...  loss: 5.0890...  0.0973 sec/batch\n",
            "step: 860/10000...  loss: 5.2005...  0.0959 sec/batch\n",
            "step: 870/10000...  loss: 5.1709...  0.0931 sec/batch\n",
            "step: 880/10000...  loss: 5.2006...  0.0898 sec/batch\n",
            "step: 890/10000...  loss: 5.1437...  0.0962 sec/batch\n",
            "step: 900/10000...  loss: 5.1160...  0.0852 sec/batch\n",
            "step: 910/10000...  loss: 5.0154...  0.0945 sec/batch\n",
            "step: 920/10000...  loss: 5.0621...  0.1522 sec/batch\n",
            "step: 930/10000...  loss: 5.1586...  0.1001 sec/batch\n",
            "step: 940/10000...  loss: 5.2192...  0.0860 sec/batch\n",
            "step: 950/10000...  loss: 5.1639...  0.0880 sec/batch\n",
            "step: 960/10000...  loss: 5.0838...  0.0962 sec/batch\n",
            "step: 970/10000...  loss: 5.0600...  0.0949 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 980/10000...  loss: 5.1152...  0.0951 sec/batch\n",
            "step: 990/10000...  loss: 5.0260...  0.1078 sec/batch\n",
            "step: 1000/10000...  loss: 5.0995...  0.0903 sec/batch\n",
            "step: 1010/10000...  loss: 5.0713...  0.0963 sec/batch\n",
            "step: 1020/10000...  loss: 5.0541...  0.1032 sec/batch\n",
            "step: 1030/10000...  loss: 5.1176...  0.0915 sec/batch\n",
            "step: 1040/10000...  loss: 5.0614...  0.0972 sec/batch\n",
            "step: 1050/10000...  loss: 4.9866...  0.0953 sec/batch\n",
            "step: 1060/10000...  loss: 5.0217...  0.0946 sec/batch\n",
            "step: 1070/10000...  loss: 4.9625...  0.0909 sec/batch\n",
            "step: 1080/10000...  loss: 5.0197...  0.0886 sec/batch\n",
            "step: 1090/10000...  loss: 5.0521...  0.0899 sec/batch\n",
            "step: 1100/10000...  loss: 5.0207...  0.0934 sec/batch\n",
            "step: 1110/10000...  loss: 5.0009...  0.0979 sec/batch\n",
            "step: 1120/10000...  loss: 5.1143...  0.0881 sec/batch\n",
            "step: 1130/10000...  loss: 5.0718...  0.1047 sec/batch\n",
            "step: 1140/10000...  loss: 5.0742...  0.0949 sec/batch\n",
            "step: 1150/10000...  loss: 5.0110...  0.1082 sec/batch\n",
            "step: 1160/10000...  loss: 5.0153...  0.0920 sec/batch\n",
            "step: 1170/10000...  loss: 5.1940...  0.0940 sec/batch\n",
            "step: 1180/10000...  loss: 5.0073...  0.0854 sec/batch\n",
            "step: 1190/10000...  loss: 5.1320...  0.1123 sec/batch\n",
            "step: 1200/10000...  loss: 5.1609...  0.0894 sec/batch\n",
            "step: 1210/10000...  loss: 4.9907...  0.0939 sec/batch\n",
            "step: 1220/10000...  loss: 4.9070...  0.0868 sec/batch\n",
            "step: 1230/10000...  loss: 5.2041...  0.1090 sec/batch\n",
            "step: 1240/10000...  loss: 5.1402...  0.0878 sec/batch\n",
            "step: 1250/10000...  loss: 5.0746...  0.0865 sec/batch\n",
            "step: 1260/10000...  loss: 5.0652...  0.0902 sec/batch\n",
            "step: 1270/10000...  loss: 5.0868...  0.1120 sec/batch\n",
            "step: 1280/10000...  loss: 5.0157...  0.0906 sec/batch\n",
            "step: 1290/10000...  loss: 5.1207...  0.1009 sec/batch\n",
            "step: 1300/10000...  loss: 5.0094...  0.0916 sec/batch\n",
            "step: 1310/10000...  loss: 4.9853...  0.1126 sec/batch\n",
            "step: 1320/10000...  loss: 5.1029...  0.0966 sec/batch\n",
            "step: 1330/10000...  loss: 5.1057...  0.0934 sec/batch\n",
            "step: 1340/10000...  loss: 4.9870...  0.0854 sec/batch\n",
            "step: 1350/10000...  loss: 4.9766...  0.1114 sec/batch\n",
            "step: 1360/10000...  loss: 4.9696...  0.0882 sec/batch\n",
            "step: 1370/10000...  loss: 4.8621...  0.0877 sec/batch\n",
            "step: 1380/10000...  loss: 5.0154...  0.1017 sec/batch\n",
            "step: 1390/10000...  loss: 4.8453...  0.1022 sec/batch\n",
            "step: 1400/10000...  loss: 4.8987...  0.0897 sec/batch\n",
            "step: 1410/10000...  loss: 5.0550...  0.0949 sec/batch\n",
            "step: 1420/10000...  loss: 5.0368...  0.0933 sec/batch\n",
            "step: 1430/10000...  loss: 5.0870...  0.0867 sec/batch\n",
            "step: 1440/10000...  loss: 4.9731...  0.0971 sec/batch\n",
            "step: 1450/10000...  loss: 4.9920...  0.0946 sec/batch\n",
            "step: 1460/10000...  loss: 5.0103...  0.0936 sec/batch\n",
            "step: 1470/10000...  loss: 4.9787...  0.1224 sec/batch\n",
            "step: 1480/10000...  loss: 4.9669...  0.0875 sec/batch\n",
            "step: 1490/10000...  loss: 4.9396...  0.0939 sec/batch\n",
            "step: 1500/10000...  loss: 4.8835...  0.0867 sec/batch\n",
            "step: 1510/10000...  loss: 5.0471...  0.1005 sec/batch\n",
            "step: 1520/10000...  loss: 4.9288...  0.0904 sec/batch\n",
            "step: 1530/10000...  loss: 4.9994...  0.0886 sec/batch\n",
            "step: 1540/10000...  loss: 4.9793...  0.0917 sec/batch\n",
            "step: 1550/10000...  loss: 4.9663...  0.1047 sec/batch\n",
            "step: 1560/10000...  loss: 4.9698...  0.0976 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 1570/10000...  loss: 4.9367...  0.0986 sec/batch\n",
            "step: 1580/10000...  loss: 4.9327...  0.0937 sec/batch\n",
            "step: 1590/10000...  loss: 4.8923...  0.0935 sec/batch\n",
            "step: 1600/10000...  loss: 5.0011...  0.0924 sec/batch\n",
            "step: 1610/10000...  loss: 5.0179...  0.0882 sec/batch\n",
            "step: 1620/10000...  loss: 4.9311...  0.0936 sec/batch\n",
            "step: 1630/10000...  loss: 4.9134...  0.0863 sec/batch\n",
            "step: 1640/10000...  loss: 4.9615...  0.0829 sec/batch\n",
            "step: 1650/10000...  loss: 4.9001...  0.0820 sec/batch\n",
            "step: 1660/10000...  loss: 4.9989...  0.0915 sec/batch\n",
            "step: 1670/10000...  loss: 4.9454...  0.0857 sec/batch\n",
            "step: 1680/10000...  loss: 4.8506...  0.0862 sec/batch\n",
            "step: 1690/10000...  loss: 4.8397...  0.0878 sec/batch\n",
            "step: 1700/10000...  loss: 4.8330...  0.1092 sec/batch\n",
            "step: 1710/10000...  loss: 5.0012...  0.0870 sec/batch\n",
            "step: 1720/10000...  loss: 4.9794...  0.0840 sec/batch\n",
            "step: 1730/10000...  loss: 4.8621...  0.0929 sec/batch\n",
            "step: 1740/10000...  loss: 4.8816...  0.1013 sec/batch\n",
            "step: 1750/10000...  loss: 4.7770...  0.0971 sec/batch\n",
            "step: 1760/10000...  loss: 4.8436...  0.0896 sec/batch\n",
            "step: 1770/10000...  loss: 4.8621...  0.0983 sec/batch\n",
            "step: 1780/10000...  loss: 4.9439...  0.0925 sec/batch\n",
            "step: 1790/10000...  loss: 4.9065...  0.0939 sec/batch\n",
            "step: 1800/10000...  loss: 5.0004...  0.0924 sec/batch\n",
            "step: 1810/10000...  loss: 4.8106...  0.0826 sec/batch\n",
            "step: 1820/10000...  loss: 4.8281...  0.1210 sec/batch\n",
            "step: 1830/10000...  loss: 4.9824...  0.0922 sec/batch\n",
            "step: 1840/10000...  loss: 4.8363...  0.0939 sec/batch\n",
            "step: 1850/10000...  loss: 4.8541...  0.0881 sec/batch\n",
            "step: 1860/10000...  loss: 4.9124...  0.1033 sec/batch\n",
            "step: 1870/10000...  loss: 4.9806...  0.0903 sec/batch\n",
            "step: 1880/10000...  loss: 4.9092...  0.0877 sec/batch\n",
            "step: 1890/10000...  loss: 4.9126...  0.0880 sec/batch\n",
            "step: 1900/10000...  loss: 4.8481...  0.1003 sec/batch\n",
            "step: 1910/10000...  loss: 4.9412...  0.0908 sec/batch\n",
            "step: 1920/10000...  loss: 4.9901...  0.0890 sec/batch\n",
            "step: 1930/10000...  loss: 4.8934...  0.0968 sec/batch\n",
            "step: 1940/10000...  loss: 4.9002...  0.1117 sec/batch\n",
            "step: 1950/10000...  loss: 4.7907...  0.0878 sec/batch\n",
            "step: 1960/10000...  loss: 4.8661...  0.0909 sec/batch\n",
            "step: 1970/10000...  loss: 4.6941...  0.0918 sec/batch\n",
            "step: 1980/10000...  loss: 4.8521...  0.1306 sec/batch\n",
            "step: 1990/10000...  loss: 4.7692...  0.0950 sec/batch\n",
            "step: 2000/10000...  loss: 4.8366...  0.0970 sec/batch\n",
            "step: 2010/10000...  loss: 4.8439...  0.0889 sec/batch\n",
            "step: 2020/10000...  loss: 4.8434...  0.0955 sec/batch\n",
            "step: 2030/10000...  loss: 4.8258...  0.0959 sec/batch\n",
            "step: 2040/10000...  loss: 4.8247...  0.0896 sec/batch\n",
            "step: 2050/10000...  loss: 4.8893...  0.0884 sec/batch\n",
            "step: 2060/10000...  loss: 4.8378...  0.0901 sec/batch\n",
            "step: 2070/10000...  loss: 4.8493...  0.0883 sec/batch\n",
            "step: 2080/10000...  loss: 4.8377...  0.0925 sec/batch\n",
            "step: 2090/10000...  loss: 4.8977...  0.0903 sec/batch\n",
            "step: 2100/10000...  loss: 4.7552...  0.0909 sec/batch\n",
            "step: 2110/10000...  loss: 4.7722...  0.0910 sec/batch\n",
            "step: 2120/10000...  loss: 4.7337...  0.0953 sec/batch\n",
            "step: 2130/10000...  loss: 4.9939...  0.0872 sec/batch\n",
            "step: 2140/10000...  loss: 4.9350...  0.0880 sec/batch\n",
            "step: 2150/10000...  loss: 4.9199...  0.0886 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 2160/10000...  loss: 4.7667...  0.0947 sec/batch\n",
            "step: 2170/10000...  loss: 4.6352...  0.0955 sec/batch\n",
            "step: 2180/10000...  loss: 4.8275...  0.1007 sec/batch\n",
            "step: 2190/10000...  loss: 4.8438...  0.0906 sec/batch\n",
            "step: 2200/10000...  loss: 4.7311...  0.1000 sec/batch\n",
            "step: 2210/10000...  loss: 4.9158...  0.0966 sec/batch\n",
            "step: 2220/10000...  loss: 4.8483...  0.0963 sec/batch\n",
            "step: 2230/10000...  loss: 4.8412...  0.0984 sec/batch\n",
            "step: 2240/10000...  loss: 4.8208...  0.0947 sec/batch\n",
            "step: 2250/10000...  loss: 4.6997...  0.0870 sec/batch\n",
            "step: 2260/10000...  loss: 4.8333...  0.0911 sec/batch\n",
            "step: 2270/10000...  loss: 4.8537...  0.0931 sec/batch\n",
            "step: 2280/10000...  loss: 4.7533...  0.0927 sec/batch\n",
            "step: 2290/10000...  loss: 4.9129...  0.0897 sec/batch\n",
            "step: 2300/10000...  loss: 4.7394...  0.0892 sec/batch\n",
            "step: 2310/10000...  loss: 4.7632...  0.0933 sec/batch\n",
            "step: 2320/10000...  loss: 4.8065...  0.0889 sec/batch\n",
            "step: 2330/10000...  loss: 4.8986...  0.0872 sec/batch\n",
            "step: 2340/10000...  loss: 4.8668...  0.0902 sec/batch\n",
            "step: 2350/10000...  loss: 4.8716...  0.0896 sec/batch\n",
            "step: 2360/10000...  loss: 4.8986...  0.0862 sec/batch\n",
            "step: 2370/10000...  loss: 4.9208...  0.0931 sec/batch\n",
            "step: 2380/10000...  loss: 4.8216...  0.0894 sec/batch\n",
            "step: 2390/10000...  loss: 4.9293...  0.0955 sec/batch\n",
            "step: 2400/10000...  loss: 4.9451...  0.0919 sec/batch\n",
            "step: 2410/10000...  loss: 4.8225...  0.0896 sec/batch\n",
            "step: 2420/10000...  loss: 4.8902...  0.0937 sec/batch\n",
            "step: 2430/10000...  loss: 4.8697...  0.0900 sec/batch\n",
            "step: 2440/10000...  loss: 4.9410...  0.0879 sec/batch\n",
            "step: 2450/10000...  loss: 4.9345...  0.0909 sec/batch\n",
            "step: 2460/10000...  loss: 4.8126...  0.0892 sec/batch\n",
            "step: 2470/10000...  loss: 4.8382...  0.0920 sec/batch\n",
            "step: 2480/10000...  loss: 4.9153...  0.0899 sec/batch\n",
            "step: 2490/10000...  loss: 4.8122...  0.0905 sec/batch\n",
            "step: 2500/10000...  loss: 4.8143...  0.0892 sec/batch\n",
            "step: 2510/10000...  loss: 4.9267...  0.0870 sec/batch\n",
            "step: 2520/10000...  loss: 4.8101...  0.0874 sec/batch\n",
            "step: 2530/10000...  loss: 4.6365...  0.0923 sec/batch\n",
            "step: 2540/10000...  loss: 4.7815...  0.0868 sec/batch\n",
            "step: 2550/10000...  loss: 4.8606...  0.0981 sec/batch\n",
            "step: 2560/10000...  loss: 4.8211...  0.0961 sec/batch\n",
            "step: 2570/10000...  loss: 4.7558...  0.0869 sec/batch\n",
            "step: 2580/10000...  loss: 4.8584...  0.0891 sec/batch\n",
            "step: 2590/10000...  loss: 4.8609...  0.1121 sec/batch\n",
            "step: 2600/10000...  loss: 4.7052...  0.0850 sec/batch\n",
            "step: 2610/10000...  loss: 4.7992...  0.0893 sec/batch\n",
            "step: 2620/10000...  loss: 4.9708...  0.0865 sec/batch\n",
            "step: 2630/10000...  loss: 4.7953...  0.1047 sec/batch\n",
            "step: 2640/10000...  loss: 4.8165...  0.0982 sec/batch\n",
            "step: 2650/10000...  loss: 4.8085...  0.0899 sec/batch\n",
            "step: 2660/10000...  loss: 4.9473...  0.0962 sec/batch\n",
            "step: 2670/10000...  loss: 4.8989...  0.1047 sec/batch\n",
            "step: 2680/10000...  loss: 4.9751...  0.0881 sec/batch\n",
            "step: 2690/10000...  loss: 4.9838...  0.0922 sec/batch\n",
            "step: 2700/10000...  loss: 4.8516...  0.0891 sec/batch\n",
            "step: 2710/10000...  loss: 4.8541...  0.0862 sec/batch\n",
            "step: 2720/10000...  loss: 4.8851...  0.0940 sec/batch\n",
            "step: 2730/10000...  loss: 4.9025...  0.0918 sec/batch\n",
            "step: 2740/10000...  loss: 4.8846...  0.0894 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 2750/10000...  loss: 4.7963...  0.0895 sec/batch\n",
            "step: 2760/10000...  loss: 4.8824...  0.0933 sec/batch\n",
            "step: 2770/10000...  loss: 4.8344...  0.0887 sec/batch\n",
            "step: 2780/10000...  loss: 4.8920...  0.0934 sec/batch\n",
            "step: 2790/10000...  loss: 4.9805...  0.0964 sec/batch\n",
            "step: 2800/10000...  loss: 4.7256...  0.0853 sec/batch\n",
            "step: 2810/10000...  loss: 4.8118...  0.0916 sec/batch\n",
            "step: 2820/10000...  loss: 4.7538...  0.0919 sec/batch\n",
            "step: 2830/10000...  loss: 4.8101...  0.1163 sec/batch\n",
            "step: 2840/10000...  loss: 4.8542...  0.0883 sec/batch\n",
            "step: 2850/10000...  loss: 4.7256...  0.0880 sec/batch\n",
            "step: 2860/10000...  loss: 4.8227...  0.0861 sec/batch\n",
            "step: 2870/10000...  loss: 4.7680...  0.0900 sec/batch\n",
            "step: 2880/10000...  loss: 4.7295...  0.0856 sec/batch\n",
            "step: 2890/10000...  loss: 4.8291...  0.0991 sec/batch\n",
            "step: 2900/10000...  loss: 4.8385...  0.0933 sec/batch\n",
            "step: 2910/10000...  loss: 4.7957...  0.0909 sec/batch\n",
            "step: 2920/10000...  loss: 4.7721...  0.1092 sec/batch\n",
            "step: 2930/10000...  loss: 4.6331...  0.1041 sec/batch\n",
            "step: 2940/10000...  loss: 4.8727...  0.1032 sec/batch\n",
            "step: 2950/10000...  loss: 4.7283...  0.0943 sec/batch\n",
            "step: 2960/10000...  loss: 4.8422...  0.1089 sec/batch\n",
            "step: 2970/10000...  loss: 4.7778...  0.0866 sec/batch\n",
            "step: 2980/10000...  loss: 4.7870...  0.0948 sec/batch\n",
            "step: 2990/10000...  loss: 4.6794...  0.0927 sec/batch\n",
            "step: 3000/10000...  loss: 4.7577...  0.1004 sec/batch\n",
            "step: 3010/10000...  loss: 4.6356...  0.1068 sec/batch\n",
            "step: 3020/10000...  loss: 4.7878...  0.0859 sec/batch\n",
            "step: 3030/10000...  loss: 4.7142...  0.0990 sec/batch\n",
            "step: 3040/10000...  loss: 4.8075...  0.0917 sec/batch\n",
            "step: 3050/10000...  loss: 4.7504...  0.0907 sec/batch\n",
            "step: 3060/10000...  loss: 4.8520...  0.0856 sec/batch\n",
            "step: 3070/10000...  loss: 4.8071...  0.0965 sec/batch\n",
            "step: 3080/10000...  loss: 4.7467...  0.0961 sec/batch\n",
            "step: 3090/10000...  loss: 4.7757...  0.0900 sec/batch\n",
            "step: 3100/10000...  loss: 4.7617...  0.0969 sec/batch\n",
            "step: 3110/10000...  loss: 4.7982...  0.0936 sec/batch\n",
            "step: 3120/10000...  loss: 4.7856...  0.0952 sec/batch\n",
            "step: 3130/10000...  loss: 4.8547...  0.0926 sec/batch\n",
            "step: 3140/10000...  loss: 4.8235...  0.0892 sec/batch\n",
            "step: 3150/10000...  loss: 4.6735...  0.0882 sec/batch\n",
            "step: 3160/10000...  loss: 4.6402...  0.0922 sec/batch\n",
            "step: 3170/10000...  loss: 4.7454...  0.0880 sec/batch\n",
            "step: 3180/10000...  loss: 4.7067...  0.0863 sec/batch\n",
            "step: 3190/10000...  loss: 4.7166...  0.0894 sec/batch\n",
            "step: 3200/10000...  loss: 4.7413...  0.0931 sec/batch\n",
            "step: 3210/10000...  loss: 4.7489...  0.0884 sec/batch\n",
            "step: 3220/10000...  loss: 4.7947...  0.0915 sec/batch\n",
            "step: 3230/10000...  loss: 4.8397...  0.0901 sec/batch\n",
            "step: 3240/10000...  loss: 4.6653...  0.0940 sec/batch\n",
            "step: 3250/10000...  loss: 4.7558...  0.0953 sec/batch\n",
            "step: 3260/10000...  loss: 4.8765...  0.0886 sec/batch\n",
            "step: 3270/10000...  loss: 4.7738...  0.0874 sec/batch\n",
            "step: 3280/10000...  loss: 4.6573...  0.0926 sec/batch\n",
            "step: 3290/10000...  loss: 4.7940...  0.0843 sec/batch\n",
            "step: 3300/10000...  loss: 4.8221...  0.0903 sec/batch\n",
            "step: 3310/10000...  loss: 4.6640...  0.0876 sec/batch\n",
            "step: 3320/10000...  loss: 4.6279...  0.0915 sec/batch\n",
            "step: 3330/10000...  loss: 4.6867...  0.0882 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 3340/10000...  loss: 4.7440...  0.0866 sec/batch\n",
            "step: 3350/10000...  loss: 4.6534...  0.0866 sec/batch\n",
            "step: 3360/10000...  loss: 4.8727...  0.0930 sec/batch\n",
            "step: 3370/10000...  loss: 4.7668...  0.0857 sec/batch\n",
            "step: 3380/10000...  loss: 4.6585...  0.0996 sec/batch\n",
            "step: 3390/10000...  loss: 4.6583...  0.0899 sec/batch\n",
            "step: 3400/10000...  loss: 4.8205...  0.0837 sec/batch\n",
            "step: 3410/10000...  loss: 4.7868...  0.0918 sec/batch\n",
            "step: 3420/10000...  loss: 4.7547...  0.0827 sec/batch\n",
            "step: 3430/10000...  loss: 4.6834...  0.0840 sec/batch\n",
            "step: 3440/10000...  loss: 4.7170...  0.0863 sec/batch\n",
            "step: 3450/10000...  loss: 4.7669...  0.0870 sec/batch\n",
            "step: 3460/10000...  loss: 4.7696...  0.0895 sec/batch\n",
            "step: 3470/10000...  loss: 4.7673...  0.0923 sec/batch\n",
            "step: 3480/10000...  loss: 4.6205...  0.0888 sec/batch\n",
            "step: 3490/10000...  loss: 4.8415...  0.0846 sec/batch\n",
            "step: 3500/10000...  loss: 4.8639...  0.0921 sec/batch\n",
            "step: 3510/10000...  loss: 4.8622...  0.0915 sec/batch\n",
            "step: 3520/10000...  loss: 4.7772...  0.0918 sec/batch\n",
            "step: 3530/10000...  loss: 4.7766...  0.0926 sec/batch\n",
            "step: 3540/10000...  loss: 4.7278...  0.0966 sec/batch\n",
            "step: 3550/10000...  loss: 4.9317...  0.0867 sec/batch\n",
            "step: 3560/10000...  loss: 4.8605...  0.0962 sec/batch\n",
            "step: 3570/10000...  loss: 4.8021...  0.0901 sec/batch\n",
            "step: 3580/10000...  loss: 4.7813...  0.0885 sec/batch\n",
            "step: 3590/10000...  loss: 4.7355...  0.0912 sec/batch\n",
            "step: 3600/10000...  loss: 4.7202...  0.0926 sec/batch\n",
            "step: 3610/10000...  loss: 4.6979...  0.0882 sec/batch\n",
            "step: 3620/10000...  loss: 4.6886...  0.0915 sec/batch\n",
            "step: 3630/10000...  loss: 4.7133...  0.0912 sec/batch\n",
            "step: 3640/10000...  loss: 4.7579...  0.0918 sec/batch\n",
            "step: 3650/10000...  loss: 4.7951...  0.0886 sec/batch\n",
            "step: 3660/10000...  loss: 4.7677...  0.0860 sec/batch\n",
            "step: 3670/10000...  loss: 4.7415...  0.0999 sec/batch\n",
            "step: 3680/10000...  loss: 4.8299...  0.0882 sec/batch\n",
            "step: 3690/10000...  loss: 4.7391...  0.0896 sec/batch\n",
            "step: 3700/10000...  loss: 4.7723...  0.0877 sec/batch\n",
            "step: 3710/10000...  loss: 4.8157...  0.0871 sec/batch\n",
            "step: 3720/10000...  loss: 4.8106...  0.0878 sec/batch\n",
            "step: 3730/10000...  loss: 4.7024...  0.0941 sec/batch\n",
            "step: 3740/10000...  loss: 4.6619...  0.0886 sec/batch\n",
            "step: 3750/10000...  loss: 4.7362...  0.0902 sec/batch\n",
            "step: 3760/10000...  loss: 4.6879...  0.0899 sec/batch\n",
            "step: 3770/10000...  loss: 4.7354...  0.0875 sec/batch\n",
            "step: 3780/10000...  loss: 4.8969...  0.0887 sec/batch\n",
            "step: 3790/10000...  loss: 4.8294...  0.1003 sec/batch\n",
            "step: 3800/10000...  loss: 4.6909...  0.0890 sec/batch\n",
            "step: 3810/10000...  loss: 4.9102...  0.0973 sec/batch\n",
            "step: 3820/10000...  loss: 4.7756...  0.1031 sec/batch\n",
            "step: 3830/10000...  loss: 4.6378...  0.0899 sec/batch\n",
            "step: 3840/10000...  loss: 4.7842...  0.0960 sec/batch\n",
            "step: 3850/10000...  loss: 4.7560...  0.0899 sec/batch\n",
            "step: 3860/10000...  loss: 4.7347...  0.0981 sec/batch\n",
            "step: 3870/10000...  loss: 4.8285...  0.0895 sec/batch\n",
            "step: 3880/10000...  loss: 4.7607...  0.0920 sec/batch\n",
            "step: 3890/10000...  loss: 4.7989...  0.0987 sec/batch\n",
            "step: 3900/10000...  loss: 4.8369...  0.0894 sec/batch\n",
            "step: 3910/10000...  loss: 4.7346...  0.0999 sec/batch\n",
            "step: 3920/10000...  loss: 4.8247...  0.0918 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 3930/10000...  loss: 4.7967...  0.0899 sec/batch\n",
            "step: 3940/10000...  loss: 4.7688...  0.1027 sec/batch\n",
            "step: 3950/10000...  loss: 4.7996...  0.1058 sec/batch\n",
            "step: 3960/10000...  loss: 4.7448...  0.1031 sec/batch\n",
            "step: 3970/10000...  loss: 4.6292...  0.0876 sec/batch\n",
            "step: 3980/10000...  loss: 4.7492...  0.0932 sec/batch\n",
            "step: 3990/10000...  loss: 4.7180...  0.1228 sec/batch\n",
            "step: 4000/10000...  loss: 4.7951...  0.0977 sec/batch\n",
            "step: 4010/10000...  loss: 4.6747...  0.0913 sec/batch\n",
            "step: 4020/10000...  loss: 4.5647...  0.1014 sec/batch\n",
            "step: 4030/10000...  loss: 4.7488...  0.0872 sec/batch\n",
            "step: 4040/10000...  loss: 4.8563...  0.0871 sec/batch\n",
            "step: 4050/10000...  loss: 4.6390...  0.0861 sec/batch\n",
            "step: 4060/10000...  loss: 4.6330...  0.0884 sec/batch\n",
            "step: 4070/10000...  loss: 4.8057...  0.0967 sec/batch\n",
            "step: 4080/10000...  loss: 4.7564...  0.1023 sec/batch\n",
            "step: 4090/10000...  loss: 4.7210...  0.0903 sec/batch\n",
            "step: 4100/10000...  loss: 4.6307...  0.0956 sec/batch\n",
            "step: 4110/10000...  loss: 4.6292...  0.0900 sec/batch\n",
            "step: 4120/10000...  loss: 4.7525...  0.1176 sec/batch\n",
            "step: 4130/10000...  loss: 4.5470...  0.0926 sec/batch\n",
            "step: 4140/10000...  loss: 4.8272...  0.0950 sec/batch\n",
            "step: 4150/10000...  loss: 4.7804...  0.0955 sec/batch\n",
            "step: 4160/10000...  loss: 4.6830...  0.0922 sec/batch\n",
            "step: 4170/10000...  loss: 4.7192...  0.0849 sec/batch\n",
            "step: 4180/10000...  loss: 4.7815...  0.1003 sec/batch\n",
            "step: 4190/10000...  loss: 4.7570...  0.0876 sec/batch\n",
            "step: 4200/10000...  loss: 4.6008...  0.0877 sec/batch\n",
            "step: 4210/10000...  loss: 4.7728...  0.0940 sec/batch\n",
            "step: 4220/10000...  loss: 4.6273...  0.0935 sec/batch\n",
            "step: 4230/10000...  loss: 4.5675...  0.0893 sec/batch\n",
            "step: 4240/10000...  loss: 4.7880...  0.1010 sec/batch\n",
            "step: 4250/10000...  loss: 4.6212...  0.0859 sec/batch\n",
            "step: 4260/10000...  loss: 4.7433...  0.0921 sec/batch\n",
            "step: 4270/10000...  loss: 4.7738...  0.0911 sec/batch\n",
            "step: 4280/10000...  loss: 4.6091...  0.1075 sec/batch\n",
            "step: 4290/10000...  loss: 4.6246...  0.0925 sec/batch\n",
            "step: 4300/10000...  loss: 4.6371...  0.0859 sec/batch\n",
            "step: 4310/10000...  loss: 4.6741...  0.0936 sec/batch\n",
            "step: 4320/10000...  loss: 4.7098...  0.1090 sec/batch\n",
            "step: 4330/10000...  loss: 4.7672...  0.1035 sec/batch\n",
            "step: 4340/10000...  loss: 4.6954...  0.0943 sec/batch\n",
            "step: 4350/10000...  loss: 4.6830...  0.0919 sec/batch\n",
            "step: 4360/10000...  loss: 4.6687...  0.0910 sec/batch\n",
            "step: 4370/10000...  loss: 4.6437...  0.0900 sec/batch\n",
            "step: 4380/10000...  loss: 4.6904...  0.0898 sec/batch\n",
            "step: 4390/10000...  loss: 4.6427...  0.1031 sec/batch\n",
            "step: 4400/10000...  loss: 4.7107...  0.0971 sec/batch\n",
            "step: 4410/10000...  loss: 4.5591...  0.0909 sec/batch\n",
            "step: 4420/10000...  loss: 4.7560...  0.0916 sec/batch\n",
            "step: 4430/10000...  loss: 4.7472...  0.1028 sec/batch\n",
            "step: 4440/10000...  loss: 4.7109...  0.0911 sec/batch\n",
            "step: 4450/10000...  loss: 4.6011...  0.0905 sec/batch\n",
            "step: 4460/10000...  loss: 4.6392...  0.0895 sec/batch\n",
            "step: 4470/10000...  loss: 4.7166...  0.1053 sec/batch\n",
            "step: 4480/10000...  loss: 4.5903...  0.0876 sec/batch\n",
            "step: 4490/10000...  loss: 4.7669...  0.0903 sec/batch\n",
            "step: 4500/10000...  loss: 4.7892...  0.0878 sec/batch\n",
            "step: 4510/10000...  loss: 4.6093...  0.0887 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 4520/10000...  loss: 4.6985...  0.0857 sec/batch\n",
            "step: 4530/10000...  loss: 4.5797...  0.0977 sec/batch\n",
            "step: 4540/10000...  loss: 4.5700...  0.0892 sec/batch\n",
            "step: 4550/10000...  loss: 4.7514...  0.0870 sec/batch\n",
            "step: 4560/10000...  loss: 4.6938...  0.0877 sec/batch\n",
            "step: 4570/10000...  loss: 4.7540...  0.0944 sec/batch\n",
            "step: 4580/10000...  loss: 4.7458...  0.1007 sec/batch\n",
            "step: 4590/10000...  loss: 4.7689...  0.0900 sec/batch\n",
            "step: 4600/10000...  loss: 4.7276...  0.1066 sec/batch\n",
            "step: 4610/10000...  loss: 4.8688...  0.1022 sec/batch\n",
            "step: 4620/10000...  loss: 4.7893...  0.0998 sec/batch\n",
            "step: 4630/10000...  loss: 4.7246...  0.0856 sec/batch\n",
            "step: 4640/10000...  loss: 4.6459...  0.0909 sec/batch\n",
            "step: 4650/10000...  loss: 4.6672...  0.0947 sec/batch\n",
            "step: 4660/10000...  loss: 4.8118...  0.0982 sec/batch\n",
            "step: 4670/10000...  loss: 4.7602...  0.0880 sec/batch\n",
            "step: 4680/10000...  loss: 4.7734...  0.0945 sec/batch\n",
            "step: 4690/10000...  loss: 4.6469...  0.0889 sec/batch\n",
            "step: 4700/10000...  loss: 4.6087...  0.0979 sec/batch\n",
            "step: 4710/10000...  loss: 4.6783...  0.0904 sec/batch\n",
            "step: 4720/10000...  loss: 4.6200...  0.0898 sec/batch\n",
            "step: 4730/10000...  loss: 4.7148...  0.0892 sec/batch\n",
            "step: 4740/10000...  loss: 4.7254...  0.0956 sec/batch\n",
            "step: 4750/10000...  loss: 4.6386...  0.0905 sec/batch\n",
            "step: 4760/10000...  loss: 4.6364...  0.0933 sec/batch\n",
            "step: 4770/10000...  loss: 4.7527...  0.0902 sec/batch\n",
            "step: 4780/10000...  loss: 4.6127...  0.0919 sec/batch\n",
            "step: 4790/10000...  loss: 4.7778...  0.1061 sec/batch\n",
            "step: 4800/10000...  loss: 4.6662...  0.0967 sec/batch\n",
            "step: 4810/10000...  loss: 4.6704...  0.0901 sec/batch\n",
            "step: 4820/10000...  loss: 4.6282...  0.1095 sec/batch\n",
            "step: 4830/10000...  loss: 4.6901...  0.0862 sec/batch\n",
            "step: 4840/10000...  loss: 4.7544...  0.0886 sec/batch\n",
            "step: 4850/10000...  loss: 4.7387...  0.0906 sec/batch\n",
            "step: 4860/10000...  loss: 4.7323...  0.1032 sec/batch\n",
            "step: 4870/10000...  loss: 4.7135...  0.0973 sec/batch\n",
            "step: 4880/10000...  loss: 4.8619...  0.1064 sec/batch\n",
            "step: 4890/10000...  loss: 4.6789...  0.0903 sec/batch\n",
            "step: 4900/10000...  loss: 4.6440...  0.0866 sec/batch\n",
            "step: 4910/10000...  loss: 4.7746...  0.0829 sec/batch\n",
            "step: 4920/10000...  loss: 4.5888...  0.0887 sec/batch\n",
            "step: 4930/10000...  loss: 4.6040...  0.0820 sec/batch\n",
            "step: 4940/10000...  loss: 4.8002...  0.0980 sec/batch\n",
            "step: 4950/10000...  loss: 4.7467...  0.0879 sec/batch\n",
            "step: 4960/10000...  loss: 4.7231...  0.0962 sec/batch\n",
            "step: 4970/10000...  loss: 4.6522...  0.0907 sec/batch\n",
            "step: 4980/10000...  loss: 4.6948...  0.0882 sec/batch\n",
            "step: 4990/10000...  loss: 4.7467...  0.0921 sec/batch\n",
            "step: 5000/10000...  loss: 4.7935...  0.0980 sec/batch\n",
            "step: 5010/10000...  loss: 4.7815...  0.1115 sec/batch\n",
            "step: 5020/10000...  loss: 4.5564...  0.0918 sec/batch\n",
            "step: 5030/10000...  loss: 4.8385...  0.0922 sec/batch\n",
            "step: 5040/10000...  loss: 4.8235...  0.0863 sec/batch\n",
            "step: 5050/10000...  loss: 4.8345...  0.0959 sec/batch\n",
            "step: 5060/10000...  loss: 4.7516...  0.1041 sec/batch\n",
            "step: 5070/10000...  loss: 4.6342...  0.0903 sec/batch\n",
            "step: 5080/10000...  loss: 4.7084...  0.0898 sec/batch\n",
            "step: 5090/10000...  loss: 4.7533...  0.1166 sec/batch\n",
            "step: 5100/10000...  loss: 4.5916...  0.0834 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 5110/10000...  loss: 4.6774...  0.0931 sec/batch\n",
            "step: 5120/10000...  loss: 4.6972...  0.0866 sec/batch\n",
            "step: 5130/10000...  loss: 4.4206...  0.0909 sec/batch\n",
            "step: 5140/10000...  loss: 4.6413...  0.0917 sec/batch\n",
            "step: 5150/10000...  loss: 4.5769...  0.0910 sec/batch\n",
            "step: 5160/10000...  loss: 4.6238...  0.0887 sec/batch\n",
            "step: 5170/10000...  loss: 4.7923...  0.0847 sec/batch\n",
            "step: 5180/10000...  loss: 4.6505...  0.0890 sec/batch\n",
            "step: 5190/10000...  loss: 4.6294...  0.0882 sec/batch\n",
            "step: 5200/10000...  loss: 4.6971...  0.0916 sec/batch\n",
            "step: 5210/10000...  loss: 4.5712...  0.0966 sec/batch\n",
            "step: 5220/10000...  loss: 4.6433...  0.0856 sec/batch\n",
            "step: 5230/10000...  loss: 4.7559...  0.1020 sec/batch\n",
            "step: 5240/10000...  loss: 4.7443...  0.0904 sec/batch\n",
            "step: 5250/10000...  loss: 4.6246...  0.0950 sec/batch\n",
            "step: 5260/10000...  loss: 4.5883...  0.0952 sec/batch\n",
            "step: 5270/10000...  loss: 4.7359...  0.0878 sec/batch\n",
            "step: 5280/10000...  loss: 4.6483...  0.0919 sec/batch\n",
            "step: 5290/10000...  loss: 4.6389...  0.1030 sec/batch\n",
            "step: 5300/10000...  loss: 4.6616...  0.1181 sec/batch\n",
            "step: 5310/10000...  loss: 4.7498...  0.0914 sec/batch\n",
            "step: 5320/10000...  loss: 4.7577...  0.0953 sec/batch\n",
            "step: 5330/10000...  loss: 4.5319...  0.0893 sec/batch\n",
            "step: 5340/10000...  loss: 4.5595...  0.1074 sec/batch\n",
            "step: 5350/10000...  loss: 4.6080...  0.0930 sec/batch\n",
            "step: 5360/10000...  loss: 4.6943...  0.0914 sec/batch\n",
            "step: 5370/10000...  loss: 4.6977...  0.0850 sec/batch\n",
            "step: 5380/10000...  loss: 4.6173...  0.1102 sec/batch\n",
            "step: 5390/10000...  loss: 4.6952...  0.0865 sec/batch\n",
            "step: 5400/10000...  loss: 4.4908...  0.0898 sec/batch\n",
            "step: 5410/10000...  loss: 4.6203...  0.0969 sec/batch\n",
            "step: 5420/10000...  loss: 4.6377...  0.1122 sec/batch\n",
            "step: 5430/10000...  loss: 4.6130...  0.0865 sec/batch\n",
            "step: 5440/10000...  loss: 4.6470...  0.0969 sec/batch\n",
            "step: 5450/10000...  loss: 4.6662...  0.0868 sec/batch\n",
            "step: 5460/10000...  loss: 4.5709...  0.1221 sec/batch\n",
            "step: 5470/10000...  loss: 4.6388...  0.0984 sec/batch\n",
            "step: 5480/10000...  loss: 4.6849...  0.1082 sec/batch\n",
            "step: 5490/10000...  loss: 4.7545...  0.0963 sec/batch\n",
            "step: 5500/10000...  loss: 4.6420...  0.0852 sec/batch\n",
            "step: 5510/10000...  loss: 4.6065...  0.0880 sec/batch\n",
            "step: 5520/10000...  loss: 4.5073...  0.0968 sec/batch\n",
            "step: 5530/10000...  loss: 4.6393...  0.0909 sec/batch\n",
            "step: 5540/10000...  loss: 4.6503...  0.0910 sec/batch\n",
            "step: 5550/10000...  loss: 4.7336...  0.0887 sec/batch\n",
            "step: 5560/10000...  loss: 4.6163...  0.0943 sec/batch\n",
            "step: 5570/10000...  loss: 4.5854...  0.0902 sec/batch\n",
            "step: 5580/10000...  loss: 4.5102...  0.0913 sec/batch\n",
            "step: 5590/10000...  loss: 4.7511...  0.0932 sec/batch\n",
            "step: 5600/10000...  loss: 4.6173...  0.0896 sec/batch\n",
            "step: 5610/10000...  loss: 4.6083...  0.1029 sec/batch\n",
            "step: 5620/10000...  loss: 4.6656...  0.0987 sec/batch\n",
            "step: 5630/10000...  loss: 4.7015...  0.0968 sec/batch\n",
            "step: 5640/10000...  loss: 4.5566...  0.0914 sec/batch\n",
            "step: 5650/10000...  loss: 4.5943...  0.0907 sec/batch\n",
            "step: 5660/10000...  loss: 4.6382...  0.0873 sec/batch\n",
            "step: 5670/10000...  loss: 4.7002...  0.0894 sec/batch\n",
            "step: 5680/10000...  loss: 4.6437...  0.0911 sec/batch\n",
            "step: 5690/10000...  loss: 4.7324...  0.0962 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 5700/10000...  loss: 4.6028...  0.0939 sec/batch\n",
            "step: 5710/10000...  loss: 4.6851...  0.0929 sec/batch\n",
            "step: 5720/10000...  loss: 4.6200...  0.0869 sec/batch\n",
            "step: 5730/10000...  loss: 4.7576...  0.0920 sec/batch\n",
            "step: 5740/10000...  loss: 4.7400...  0.0889 sec/batch\n",
            "step: 5750/10000...  loss: 4.8400...  0.1121 sec/batch\n",
            "step: 5760/10000...  loss: 4.6998...  0.0989 sec/batch\n",
            "step: 5770/10000...  loss: 4.6979...  0.0908 sec/batch\n",
            "step: 5780/10000...  loss: 4.6627...  0.0895 sec/batch\n",
            "step: 5790/10000...  loss: 4.6360...  0.1020 sec/batch\n",
            "step: 5800/10000...  loss: 4.6108...  0.0928 sec/batch\n",
            "step: 5810/10000...  loss: 4.6418...  0.0840 sec/batch\n",
            "step: 5820/10000...  loss: 4.6132...  0.0872 sec/batch\n",
            "step: 5830/10000...  loss: 4.6025...  0.1036 sec/batch\n",
            "step: 5840/10000...  loss: 4.7183...  0.0903 sec/batch\n",
            "step: 5850/10000...  loss: 4.6383...  0.0876 sec/batch\n",
            "step: 5860/10000...  loss: 4.6912...  0.0931 sec/batch\n",
            "step: 5870/10000...  loss: 4.6855...  0.1182 sec/batch\n",
            "step: 5880/10000...  loss: 4.6500...  0.0976 sec/batch\n",
            "step: 5890/10000...  loss: 4.6409...  0.0872 sec/batch\n",
            "step: 5900/10000...  loss: 4.6480...  0.0884 sec/batch\n",
            "step: 5910/10000...  loss: 4.7292...  0.0982 sec/batch\n",
            "step: 5920/10000...  loss: 4.7184...  0.0924 sec/batch\n",
            "step: 5930/10000...  loss: 4.6348...  0.1074 sec/batch\n",
            "step: 5940/10000...  loss: 4.7059...  0.0875 sec/batch\n",
            "step: 5950/10000...  loss: 4.6917...  0.0936 sec/batch\n",
            "step: 5960/10000...  loss: 4.6176...  0.0922 sec/batch\n",
            "step: 5970/10000...  loss: 4.7509...  0.0896 sec/batch\n",
            "step: 5980/10000...  loss: 4.7417...  0.0908 sec/batch\n",
            "step: 5990/10000...  loss: 4.6811...  0.1039 sec/batch\n",
            "step: 6000/10000...  loss: 4.6937...  0.0883 sec/batch\n",
            "step: 6010/10000...  loss: 4.7284...  0.0904 sec/batch\n",
            "step: 6020/10000...  loss: 4.6517...  0.0871 sec/batch\n",
            "step: 6030/10000...  loss: 4.6006...  0.0886 sec/batch\n",
            "step: 6040/10000...  loss: 4.5722...  0.1102 sec/batch\n",
            "step: 6050/10000...  loss: 4.5422...  0.0884 sec/batch\n",
            "step: 6060/10000...  loss: 4.7869...  0.0926 sec/batch\n",
            "step: 6070/10000...  loss: 4.6350...  0.0993 sec/batch\n",
            "step: 6080/10000...  loss: 4.7717...  0.0898 sec/batch\n",
            "step: 6090/10000...  loss: 4.7230...  0.0935 sec/batch\n",
            "step: 6100/10000...  loss: 4.6998...  0.0927 sec/batch\n",
            "step: 6110/10000...  loss: 4.6467...  0.0918 sec/batch\n",
            "step: 6120/10000...  loss: 4.7526...  0.0954 sec/batch\n",
            "step: 6130/10000...  loss: 4.7436...  0.0867 sec/batch\n",
            "step: 6140/10000...  loss: 4.7009...  0.0884 sec/batch\n",
            "step: 6150/10000...  loss: 4.5120...  0.0913 sec/batch\n",
            "step: 6160/10000...  loss: 4.6679...  0.0890 sec/batch\n",
            "step: 6170/10000...  loss: 4.6094...  0.0873 sec/batch\n",
            "step: 6180/10000...  loss: 4.6189...  0.0953 sec/batch\n",
            "step: 6190/10000...  loss: 4.7088...  0.0897 sec/batch\n",
            "step: 6200/10000...  loss: 4.5925...  0.0891 sec/batch\n",
            "step: 6210/10000...  loss: 4.6124...  0.0905 sec/batch\n",
            "step: 6220/10000...  loss: 4.6974...  0.1009 sec/batch\n",
            "step: 6230/10000...  loss: 4.7231...  0.0896 sec/batch\n",
            "step: 6240/10000...  loss: 4.6952...  0.0936 sec/batch\n",
            "step: 6250/10000...  loss: 4.5462...  0.1081 sec/batch\n",
            "step: 6260/10000...  loss: 4.6077...  0.0891 sec/batch\n",
            "step: 6270/10000...  loss: 4.7436...  0.0921 sec/batch\n",
            "step: 6280/10000...  loss: 4.6818...  0.0903 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 6290/10000...  loss: 4.5901...  0.1084 sec/batch\n",
            "step: 6300/10000...  loss: 4.7541...  0.1072 sec/batch\n",
            "step: 6310/10000...  loss: 4.7399...  0.0894 sec/batch\n",
            "step: 6320/10000...  loss: 4.5785...  0.0890 sec/batch\n",
            "step: 6330/10000...  loss: 4.7056...  0.0879 sec/batch\n",
            "step: 6340/10000...  loss: 4.5118...  0.0934 sec/batch\n",
            "step: 6350/10000...  loss: 4.5934...  0.0876 sec/batch\n",
            "step: 6360/10000...  loss: 4.6276...  0.0945 sec/batch\n",
            "step: 6370/10000...  loss: 4.5988...  0.1086 sec/batch\n",
            "step: 6380/10000...  loss: 4.7031...  0.1007 sec/batch\n",
            "step: 6390/10000...  loss: 4.6180...  0.0938 sec/batch\n",
            "step: 6400/10000...  loss: 4.6781...  0.0887 sec/batch\n",
            "step: 6410/10000...  loss: 4.5657...  0.0960 sec/batch\n",
            "step: 6420/10000...  loss: 4.6993...  0.0880 sec/batch\n",
            "step: 6430/10000...  loss: 4.7189...  0.0907 sec/batch\n",
            "step: 6440/10000...  loss: 4.5817...  0.0863 sec/batch\n",
            "step: 6450/10000...  loss: 4.6847...  0.1100 sec/batch\n",
            "step: 6460/10000...  loss: 4.5981...  0.0890 sec/batch\n",
            "step: 6470/10000...  loss: 4.6016...  0.0897 sec/batch\n",
            "step: 6480/10000...  loss: 4.5638...  0.0877 sec/batch\n",
            "step: 6490/10000...  loss: 4.6858...  0.1036 sec/batch\n",
            "step: 6500/10000...  loss: 4.4613...  0.0893 sec/batch\n",
            "step: 6510/10000...  loss: 4.6037...  0.0906 sec/batch\n",
            "step: 6520/10000...  loss: 4.5960...  0.0873 sec/batch\n",
            "step: 6530/10000...  loss: 4.6400...  0.0944 sec/batch\n",
            "step: 6540/10000...  loss: 4.5454...  0.0908 sec/batch\n",
            "step: 6550/10000...  loss: 4.4611...  0.0840 sec/batch\n",
            "step: 6560/10000...  loss: 4.6385...  0.0871 sec/batch\n",
            "step: 6570/10000...  loss: 4.4945...  0.0898 sec/batch\n",
            "step: 6580/10000...  loss: 4.5617...  0.0860 sec/batch\n",
            "step: 6590/10000...  loss: 4.6809...  0.0940 sec/batch\n",
            "step: 6600/10000...  loss: 4.5555...  0.0897 sec/batch\n",
            "step: 6610/10000...  loss: 4.4528...  0.0863 sec/batch\n",
            "step: 6620/10000...  loss: 4.6082...  0.0901 sec/batch\n",
            "step: 6630/10000...  loss: 4.7801...  0.0941 sec/batch\n",
            "step: 6640/10000...  loss: 4.6948...  0.0959 sec/batch\n",
            "step: 6650/10000...  loss: 4.6163...  0.1107 sec/batch\n",
            "step: 6660/10000...  loss: 4.6134...  0.0918 sec/batch\n",
            "step: 6670/10000...  loss: 4.5979...  0.0978 sec/batch\n",
            "step: 6680/10000...  loss: 4.6256...  0.0894 sec/batch\n",
            "step: 6690/10000...  loss: 4.6180...  0.0840 sec/batch\n",
            "step: 6700/10000...  loss: 4.5030...  0.0921 sec/batch\n",
            "step: 6710/10000...  loss: 4.6326...  0.0883 sec/batch\n",
            "step: 6720/10000...  loss: 4.4899...  0.1009 sec/batch\n",
            "step: 6730/10000...  loss: 4.6855...  0.0853 sec/batch\n",
            "step: 6740/10000...  loss: 4.5870...  0.0922 sec/batch\n",
            "step: 6750/10000...  loss: 4.8134...  0.0976 sec/batch\n",
            "step: 6760/10000...  loss: 4.5962...  0.1018 sec/batch\n",
            "step: 6770/10000...  loss: 4.6327...  0.0840 sec/batch\n",
            "step: 6780/10000...  loss: 4.5311...  0.0978 sec/batch\n",
            "step: 6790/10000...  loss: 4.5845...  0.0935 sec/batch\n",
            "step: 6800/10000...  loss: 4.6039...  0.0884 sec/batch\n",
            "step: 6810/10000...  loss: 4.5224...  0.1000 sec/batch\n",
            "step: 6820/10000...  loss: 4.5950...  0.1131 sec/batch\n",
            "step: 6830/10000...  loss: 4.5994...  0.0946 sec/batch\n",
            "step: 6840/10000...  loss: 4.6660...  0.0908 sec/batch\n",
            "step: 6850/10000...  loss: 4.6079...  0.0883 sec/batch\n",
            "step: 6860/10000...  loss: 4.6590...  0.0880 sec/batch\n",
            "step: 6870/10000...  loss: 4.6126...  0.0923 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 6880/10000...  loss: 4.6539...  0.0939 sec/batch\n",
            "step: 6890/10000...  loss: 4.6775...  0.1067 sec/batch\n",
            "step: 6900/10000...  loss: 4.6485...  0.0933 sec/batch\n",
            "step: 6910/10000...  loss: 4.6610...  0.0915 sec/batch\n",
            "step: 6920/10000...  loss: 4.6436...  0.0937 sec/batch\n",
            "step: 6930/10000...  loss: 4.5383...  0.0873 sec/batch\n",
            "step: 6940/10000...  loss: 4.7345...  0.0908 sec/batch\n",
            "step: 6950/10000...  loss: 4.7277...  0.0989 sec/batch\n",
            "step: 6960/10000...  loss: 4.6551...  0.0889 sec/batch\n",
            "step: 6970/10000...  loss: 4.5980...  0.0877 sec/batch\n",
            "step: 6980/10000...  loss: 4.5870...  0.0902 sec/batch\n",
            "step: 6990/10000...  loss: 4.6365...  0.1051 sec/batch\n",
            "step: 7000/10000...  loss: 4.6720...  0.0932 sec/batch\n",
            "step: 7010/10000...  loss: 4.6554...  0.0949 sec/batch\n",
            "step: 7020/10000...  loss: 4.4978...  0.0923 sec/batch\n",
            "step: 7030/10000...  loss: 4.5188...  0.0933 sec/batch\n",
            "step: 7040/10000...  loss: 4.7094...  0.0964 sec/batch\n",
            "step: 7050/10000...  loss: 4.6812...  0.0973 sec/batch\n",
            "step: 7060/10000...  loss: 4.5269...  0.1001 sec/batch\n",
            "step: 7070/10000...  loss: 4.5391...  0.0905 sec/batch\n",
            "step: 7080/10000...  loss: 4.6300...  0.0973 sec/batch\n",
            "step: 7090/10000...  loss: 4.5960...  0.0893 sec/batch\n",
            "step: 7100/10000...  loss: 4.6308...  0.0908 sec/batch\n",
            "step: 7110/10000...  loss: 4.5511...  0.0961 sec/batch\n",
            "step: 7120/10000...  loss: 4.6733...  0.0929 sec/batch\n",
            "step: 7130/10000...  loss: 4.6295...  0.0873 sec/batch\n",
            "step: 7140/10000...  loss: 4.6619...  0.1061 sec/batch\n",
            "step: 7150/10000...  loss: 4.7400...  0.0925 sec/batch\n",
            "step: 7160/10000...  loss: 4.6165...  0.1088 sec/batch\n",
            "step: 7170/10000...  loss: 4.6516...  0.0893 sec/batch\n",
            "step: 7180/10000...  loss: 4.7484...  0.0981 sec/batch\n",
            "step: 7190/10000...  loss: 4.4948...  0.0927 sec/batch\n",
            "step: 7200/10000...  loss: 4.5394...  0.0911 sec/batch\n",
            "step: 7210/10000...  loss: 4.8308...  0.0855 sec/batch\n",
            "step: 7220/10000...  loss: 4.6820...  0.1014 sec/batch\n",
            "step: 7230/10000...  loss: 4.5807...  0.0855 sec/batch\n",
            "step: 7240/10000...  loss: 4.6651...  0.0926 sec/batch\n",
            "step: 7250/10000...  loss: 4.7415...  0.1143 sec/batch\n",
            "step: 7260/10000...  loss: 4.7141...  0.0975 sec/batch\n",
            "step: 7270/10000...  loss: 4.7324...  0.0928 sec/batch\n",
            "step: 7280/10000...  loss: 4.6216...  0.0900 sec/batch\n",
            "step: 7290/10000...  loss: 4.5137...  0.1328 sec/batch\n",
            "step: 7300/10000...  loss: 4.5967...  0.1049 sec/batch\n",
            "step: 7310/10000...  loss: 4.6654...  0.0894 sec/batch\n",
            "step: 7320/10000...  loss: 4.5759...  0.0984 sec/batch\n",
            "step: 7330/10000...  loss: 4.6317...  0.0945 sec/batch\n",
            "step: 7340/10000...  loss: 4.7221...  0.0984 sec/batch\n",
            "step: 7350/10000...  loss: 4.6853...  0.0912 sec/batch\n",
            "step: 7360/10000...  loss: 4.5253...  0.0891 sec/batch\n",
            "step: 7370/10000...  loss: 4.6267...  0.0941 sec/batch\n",
            "step: 7380/10000...  loss: 4.4240...  0.0874 sec/batch\n",
            "step: 7390/10000...  loss: 4.6061...  0.1020 sec/batch\n",
            "step: 7400/10000...  loss: 4.5282...  0.0910 sec/batch\n",
            "step: 7410/10000...  loss: 4.5467...  0.0886 sec/batch\n",
            "step: 7420/10000...  loss: 4.6002...  0.1008 sec/batch\n",
            "step: 7430/10000...  loss: 4.4848...  0.0904 sec/batch\n",
            "step: 7440/10000...  loss: 4.5564...  0.0907 sec/batch\n",
            "step: 7450/10000...  loss: 4.5614...  0.0900 sec/batch\n",
            "step: 7460/10000...  loss: 4.5751...  0.1006 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 7470/10000...  loss: 4.6160...  0.0866 sec/batch\n",
            "step: 7480/10000...  loss: 4.6221...  0.0878 sec/batch\n",
            "step: 7490/10000...  loss: 4.6254...  0.0910 sec/batch\n",
            "step: 7500/10000...  loss: 4.6454...  0.0878 sec/batch\n",
            "step: 7510/10000...  loss: 4.5263...  0.1010 sec/batch\n",
            "step: 7520/10000...  loss: 4.5824...  0.0903 sec/batch\n",
            "step: 7530/10000...  loss: 4.6218...  0.0929 sec/batch\n",
            "step: 7540/10000...  loss: 4.6059...  0.0910 sec/batch\n",
            "step: 7550/10000...  loss: 4.5480...  0.0873 sec/batch\n",
            "step: 7560/10000...  loss: 4.6316...  0.0883 sec/batch\n",
            "step: 7570/10000...  loss: 4.5782...  0.0913 sec/batch\n",
            "step: 7580/10000...  loss: 4.6705...  0.1015 sec/batch\n",
            "step: 7590/10000...  loss: 4.5632...  0.1172 sec/batch\n",
            "step: 7600/10000...  loss: 4.5502...  0.0916 sec/batch\n",
            "step: 7610/10000...  loss: 4.6325...  0.0910 sec/batch\n",
            "step: 7620/10000...  loss: 4.6143...  0.0948 sec/batch\n",
            "step: 7630/10000...  loss: 4.6401...  0.0925 sec/batch\n",
            "step: 7640/10000...  loss: 4.6058...  0.1083 sec/batch\n",
            "step: 7650/10000...  loss: 4.4045...  0.0996 sec/batch\n",
            "step: 7660/10000...  loss: 4.5775...  0.0940 sec/batch\n",
            "step: 7670/10000...  loss: 4.6791...  0.0939 sec/batch\n",
            "step: 7680/10000...  loss: 4.6021...  0.0928 sec/batch\n",
            "step: 7690/10000...  loss: 4.5756...  0.0899 sec/batch\n",
            "step: 7700/10000...  loss: 4.4690...  0.0910 sec/batch\n",
            "step: 7710/10000...  loss: 4.6216...  0.0899 sec/batch\n",
            "step: 7720/10000...  loss: 4.6908...  0.0988 sec/batch\n",
            "step: 7730/10000...  loss: 4.5388...  0.0891 sec/batch\n",
            "step: 7740/10000...  loss: 4.5490...  0.0874 sec/batch\n",
            "step: 7750/10000...  loss: 4.5206...  0.0953 sec/batch\n",
            "step: 7760/10000...  loss: 4.4812...  0.1113 sec/batch\n",
            "step: 7770/10000...  loss: 4.4927...  0.0841 sec/batch\n",
            "step: 7780/10000...  loss: 4.6073...  0.0890 sec/batch\n",
            "step: 7790/10000...  loss: 4.5739...  0.0853 sec/batch\n",
            "step: 7800/10000...  loss: 4.6212...  0.1287 sec/batch\n",
            "step: 7810/10000...  loss: 4.5563...  0.0959 sec/batch\n",
            "step: 7820/10000...  loss: 4.6082...  0.0930 sec/batch\n",
            "step: 7830/10000...  loss: 4.5427...  0.0909 sec/batch\n",
            "step: 7840/10000...  loss: 4.5607...  0.0859 sec/batch\n",
            "step: 7850/10000...  loss: 4.5356...  0.0906 sec/batch\n",
            "step: 7860/10000...  loss: 4.6555...  0.0875 sec/batch\n",
            "step: 7870/10000...  loss: 4.5419...  0.1003 sec/batch\n",
            "step: 7880/10000...  loss: 4.4669...  0.0913 sec/batch\n",
            "step: 7890/10000...  loss: 4.5111...  0.0868 sec/batch\n",
            "step: 7900/10000...  loss: 4.6000...  0.0914 sec/batch\n",
            "step: 7910/10000...  loss: 4.6142...  0.0989 sec/batch\n",
            "step: 7920/10000...  loss: 4.5968...  0.1066 sec/batch\n",
            "step: 7930/10000...  loss: 4.5989...  0.1106 sec/batch\n",
            "step: 7940/10000...  loss: 4.5994...  0.0925 sec/batch\n",
            "step: 7950/10000...  loss: 4.6041...  0.0914 sec/batch\n",
            "step: 7960/10000...  loss: 4.5507...  0.0928 sec/batch\n",
            "step: 7970/10000...  loss: 4.5354...  0.1031 sec/batch\n",
            "step: 7980/10000...  loss: 4.6406...  0.0917 sec/batch\n",
            "step: 7990/10000...  loss: 4.6410...  0.0936 sec/batch\n",
            "step: 8000/10000...  loss: 4.6625...  0.0883 sec/batch\n",
            "step: 8010/10000...  loss: 4.5772...  0.0904 sec/batch\n",
            "step: 8020/10000...  loss: 4.6812...  0.1082 sec/batch\n",
            "step: 8030/10000...  loss: 4.7502...  0.0968 sec/batch\n",
            "step: 8040/10000...  loss: 4.5747...  0.0926 sec/batch\n",
            "step: 8050/10000...  loss: 4.6265...  0.0851 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 8060/10000...  loss: 4.5926...  0.1017 sec/batch\n",
            "step: 8070/10000...  loss: 4.5954...  0.0900 sec/batch\n",
            "step: 8080/10000...  loss: 4.6970...  0.1143 sec/batch\n",
            "step: 8090/10000...  loss: 4.7825...  0.0875 sec/batch\n",
            "step: 8100/10000...  loss: 4.6735...  0.0941 sec/batch\n",
            "step: 8110/10000...  loss: 4.6434...  0.0970 sec/batch\n",
            "step: 8120/10000...  loss: 4.7077...  0.0978 sec/batch\n",
            "step: 8130/10000...  loss: 4.5738...  0.0846 sec/batch\n",
            "step: 8140/10000...  loss: 4.7203...  0.0979 sec/batch\n",
            "step: 8150/10000...  loss: 4.6812...  0.0868 sec/batch\n",
            "step: 8160/10000...  loss: 4.6029...  0.0933 sec/batch\n",
            "step: 8170/10000...  loss: 4.5119...  0.0922 sec/batch\n",
            "step: 8180/10000...  loss: 4.6293...  0.0868 sec/batch\n",
            "step: 8190/10000...  loss: 4.6079...  0.0859 sec/batch\n",
            "step: 8200/10000...  loss: 4.6418...  0.0947 sec/batch\n",
            "step: 8210/10000...  loss: 4.6154...  0.0911 sec/batch\n",
            "step: 8220/10000...  loss: 4.7005...  0.0938 sec/batch\n",
            "step: 8230/10000...  loss: 4.6382...  0.1128 sec/batch\n",
            "step: 8240/10000...  loss: 4.6328...  0.0882 sec/batch\n",
            "step: 8250/10000...  loss: 4.5354...  0.1077 sec/batch\n",
            "step: 8260/10000...  loss: 4.6102...  0.0847 sec/batch\n",
            "step: 8270/10000...  loss: 4.6150...  0.0912 sec/batch\n",
            "step: 8280/10000...  loss: 4.5451...  0.0893 sec/batch\n",
            "step: 8290/10000...  loss: 4.5671...  0.0899 sec/batch\n",
            "step: 8300/10000...  loss: 4.4917...  0.0921 sec/batch\n",
            "step: 8310/10000...  loss: 4.6544...  0.1101 sec/batch\n",
            "step: 8320/10000...  loss: 4.6283...  0.0924 sec/batch\n",
            "step: 8330/10000...  loss: 4.6683...  0.0947 sec/batch\n",
            "step: 8340/10000...  loss: 4.6915...  0.0912 sec/batch\n",
            "step: 8350/10000...  loss: 4.7295...  0.1158 sec/batch\n",
            "step: 8360/10000...  loss: 4.6201...  0.0917 sec/batch\n",
            "step: 8370/10000...  loss: 4.6730...  0.0885 sec/batch\n",
            "step: 8380/10000...  loss: 4.6421...  0.0871 sec/batch\n",
            "step: 8390/10000...  loss: 4.4909...  0.0889 sec/batch\n",
            "step: 8400/10000...  loss: 4.6271...  0.0869 sec/batch\n",
            "step: 8410/10000...  loss: 4.6707...  0.0937 sec/batch\n",
            "step: 8420/10000...  loss: 4.5780...  0.0866 sec/batch\n",
            "step: 8430/10000...  loss: 4.7211...  0.0922 sec/batch\n",
            "step: 8440/10000...  loss: 4.6390...  0.0926 sec/batch\n",
            "step: 8450/10000...  loss: 4.7050...  0.0907 sec/batch\n",
            "step: 8460/10000...  loss: 4.4904...  0.0905 sec/batch\n",
            "step: 8470/10000...  loss: 4.6536...  0.0855 sec/batch\n",
            "step: 8480/10000...  loss: 4.4162...  0.0936 sec/batch\n",
            "step: 8490/10000...  loss: 4.5931...  0.0899 sec/batch\n",
            "step: 8500/10000...  loss: 4.5700...  0.0917 sec/batch\n",
            "step: 8510/10000...  loss: 4.6493...  0.0881 sec/batch\n",
            "step: 8520/10000...  loss: 4.4857...  0.1094 sec/batch\n",
            "step: 8530/10000...  loss: 4.5074...  0.0868 sec/batch\n",
            "step: 8540/10000...  loss: 4.6752...  0.1004 sec/batch\n",
            "step: 8550/10000...  loss: 4.6573...  0.0950 sec/batch\n",
            "step: 8560/10000...  loss: 4.4968...  0.1142 sec/batch\n",
            "step: 8570/10000...  loss: 4.7074...  0.0970 sec/batch\n",
            "step: 8580/10000...  loss: 4.6042...  0.0972 sec/batch\n",
            "step: 8590/10000...  loss: 4.5797...  0.0905 sec/batch\n",
            "step: 8600/10000...  loss: 4.7578...  0.0969 sec/batch\n",
            "step: 8610/10000...  loss: 4.7127...  0.0945 sec/batch\n",
            "step: 8620/10000...  loss: 4.5538...  0.0855 sec/batch\n",
            "step: 8630/10000...  loss: 4.5837...  0.0893 sec/batch\n",
            "step: 8640/10000...  loss: 4.6992...  0.0930 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 8650/10000...  loss: 4.6277...  0.0879 sec/batch\n",
            "step: 8660/10000...  loss: 4.5336...  0.0948 sec/batch\n",
            "step: 8670/10000...  loss: 4.5748...  0.0872 sec/batch\n",
            "step: 8680/10000...  loss: 4.5794...  0.0837 sec/batch\n",
            "step: 8690/10000...  loss: 4.4788...  0.1127 sec/batch\n",
            "step: 8700/10000...  loss: 4.5349...  0.0896 sec/batch\n",
            "step: 8710/10000...  loss: 4.7088...  0.1018 sec/batch\n",
            "step: 8720/10000...  loss: 4.5753...  0.0969 sec/batch\n",
            "step: 8730/10000...  loss: 4.5090...  0.0965 sec/batch\n",
            "step: 8740/10000...  loss: 4.5945...  0.0921 sec/batch\n",
            "step: 8750/10000...  loss: 4.6122...  0.0879 sec/batch\n",
            "step: 8760/10000...  loss: 4.4623...  0.0895 sec/batch\n",
            "step: 8770/10000...  loss: 4.5883...  0.0935 sec/batch\n",
            "step: 8780/10000...  loss: 4.5552...  0.0921 sec/batch\n",
            "step: 8790/10000...  loss: 4.5097...  0.0928 sec/batch\n",
            "step: 8800/10000...  loss: 4.5820...  0.0908 sec/batch\n",
            "step: 8810/10000...  loss: 4.4862...  0.0887 sec/batch\n",
            "step: 8820/10000...  loss: 4.5168...  0.0972 sec/batch\n",
            "step: 8830/10000...  loss: 4.4936...  0.0950 sec/batch\n",
            "step: 8840/10000...  loss: 4.6276...  0.0857 sec/batch\n",
            "step: 8850/10000...  loss: 4.6495...  0.1072 sec/batch\n",
            "step: 8860/10000...  loss: 4.5880...  0.0938 sec/batch\n",
            "step: 8870/10000...  loss: 4.5413...  0.0860 sec/batch\n",
            "step: 8880/10000...  loss: 4.6304...  0.0915 sec/batch\n",
            "step: 8890/10000...  loss: 4.5779...  0.0991 sec/batch\n",
            "step: 8900/10000...  loss: 4.5407...  0.0906 sec/batch\n",
            "step: 8910/10000...  loss: 4.4692...  0.0933 sec/batch\n",
            "step: 8920/10000...  loss: 4.6857...  0.0873 sec/batch\n",
            "step: 8930/10000...  loss: 4.6261...  0.1011 sec/batch\n",
            "step: 8940/10000...  loss: 4.6471...  0.0908 sec/batch\n",
            "step: 8950/10000...  loss: 4.6660...  0.0933 sec/batch\n",
            "step: 8960/10000...  loss: 4.5239...  0.0894 sec/batch\n",
            "step: 8970/10000...  loss: 4.5337...  0.0821 sec/batch\n",
            "step: 8980/10000...  loss: 4.5771...  0.0968 sec/batch\n",
            "step: 8990/10000...  loss: 4.5932...  0.0896 sec/batch\n",
            "step: 9000/10000...  loss: 4.7003...  0.0936 sec/batch\n",
            "step: 9010/10000...  loss: 4.5460...  0.0839 sec/batch\n",
            "step: 9020/10000...  loss: 4.5863...  0.0935 sec/batch\n",
            "step: 9030/10000...  loss: 4.4787...  0.0893 sec/batch\n",
            "step: 9040/10000...  loss: 4.4798...  0.0944 sec/batch\n",
            "step: 9050/10000...  loss: 4.6261...  0.0911 sec/batch\n",
            "step: 9060/10000...  loss: 4.5732...  0.1006 sec/batch\n",
            "step: 9070/10000...  loss: 4.5538...  0.1017 sec/batch\n",
            "step: 9080/10000...  loss: 4.5323...  0.1064 sec/batch\n",
            "step: 9090/10000...  loss: 4.5977...  0.1024 sec/batch\n",
            "step: 9100/10000...  loss: 4.6434...  0.0974 sec/batch\n",
            "step: 9110/10000...  loss: 4.5109...  0.0849 sec/batch\n",
            "step: 9120/10000...  loss: 4.4829...  0.0866 sec/batch\n",
            "step: 9130/10000...  loss: 4.7870...  0.0878 sec/batch\n",
            "step: 9140/10000...  loss: 4.6287...  0.0879 sec/batch\n",
            "step: 9150/10000...  loss: 4.6560...  0.0914 sec/batch\n",
            "step: 9160/10000...  loss: 4.7458...  0.0951 sec/batch\n",
            "step: 9170/10000...  loss: 4.5893...  0.0907 sec/batch\n",
            "step: 9180/10000...  loss: 4.5783...  0.0949 sec/batch\n",
            "step: 9190/10000...  loss: 4.5979...  0.1285 sec/batch\n",
            "step: 9200/10000...  loss: 4.5847...  0.0926 sec/batch\n",
            "step: 9210/10000...  loss: 4.6365...  0.0977 sec/batch\n",
            "step: 9220/10000...  loss: 4.5703...  0.0923 sec/batch\n",
            "step: 9230/10000...  loss: 4.6421...  0.0929 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 9240/10000...  loss: 4.5873...  0.0862 sec/batch\n",
            "step: 9250/10000...  loss: 4.5717...  0.0920 sec/batch\n",
            "step: 9260/10000...  loss: 4.5144...  0.0876 sec/batch\n",
            "step: 9270/10000...  loss: 4.4788...  0.0903 sec/batch\n",
            "step: 9280/10000...  loss: 4.6626...  0.0982 sec/batch\n",
            "step: 9290/10000...  loss: 4.4947...  0.0866 sec/batch\n",
            "step: 9300/10000...  loss: 4.5811...  0.0882 sec/batch\n",
            "step: 9310/10000...  loss: 4.6134...  0.0947 sec/batch\n",
            "step: 9320/10000...  loss: 4.4892...  0.1094 sec/batch\n",
            "step: 9330/10000...  loss: 4.4825...  0.0935 sec/batch\n",
            "step: 9340/10000...  loss: 4.6810...  0.1011 sec/batch\n",
            "step: 9350/10000...  loss: 4.5688...  0.0911 sec/batch\n",
            "step: 9360/10000...  loss: 4.6239...  0.0952 sec/batch\n",
            "step: 9370/10000...  loss: 4.6082...  0.0892 sec/batch\n",
            "step: 9380/10000...  loss: 4.8793...  0.0949 sec/batch\n",
            "step: 9390/10000...  loss: 4.6597...  0.0944 sec/batch\n",
            "step: 9400/10000...  loss: 4.6949...  0.0973 sec/batch\n",
            "step: 9410/10000...  loss: 4.5442...  0.0915 sec/batch\n",
            "step: 9420/10000...  loss: 4.5411...  0.0954 sec/batch\n",
            "step: 9430/10000...  loss: 4.6218...  0.0839 sec/batch\n",
            "step: 9440/10000...  loss: 4.6181...  0.0893 sec/batch\n",
            "step: 9450/10000...  loss: 4.5185...  0.0955 sec/batch\n",
            "step: 9460/10000...  loss: 4.7007...  0.0959 sec/batch\n",
            "step: 9470/10000...  loss: 4.6732...  0.1050 sec/batch\n",
            "step: 9480/10000...  loss: 4.6962...  0.1092 sec/batch\n",
            "step: 9490/10000...  loss: 4.5258...  0.1190 sec/batch\n",
            "step: 9500/10000...  loss: 4.5217...  0.0931 sec/batch\n",
            "step: 9510/10000...  loss: 4.6891...  0.0932 sec/batch\n",
            "step: 9520/10000...  loss: 4.6916...  0.0866 sec/batch\n",
            "step: 9530/10000...  loss: 4.5395...  0.1104 sec/batch\n",
            "step: 9540/10000...  loss: 4.6270...  0.1037 sec/batch\n",
            "step: 9550/10000...  loss: 4.6109...  0.1054 sec/batch\n",
            "step: 9560/10000...  loss: 4.6458...  0.0912 sec/batch\n",
            "step: 9570/10000...  loss: 4.6489...  0.0877 sec/batch\n",
            "step: 9580/10000...  loss: 4.6561...  0.1015 sec/batch\n",
            "step: 9590/10000...  loss: 4.7778...  0.1012 sec/batch\n",
            "step: 9600/10000...  loss: 4.6169...  0.0973 sec/batch\n",
            "step: 9610/10000...  loss: 4.5508...  0.0973 sec/batch\n",
            "step: 9620/10000...  loss: 4.6320...  0.1003 sec/batch\n",
            "step: 9630/10000...  loss: 4.4132...  0.1281 sec/batch\n",
            "step: 9640/10000...  loss: 4.6708...  0.1090 sec/batch\n",
            "step: 9650/10000...  loss: 4.5088...  0.1056 sec/batch\n",
            "step: 9660/10000...  loss: 4.4661...  0.1146 sec/batch\n",
            "step: 9670/10000...  loss: 4.5559...  0.1078 sec/batch\n",
            "step: 9680/10000...  loss: 4.6313...  0.1068 sec/batch\n",
            "step: 9690/10000...  loss: 4.4804...  0.0942 sec/batch\n",
            "step: 9700/10000...  loss: 4.7342...  0.0858 sec/batch\n",
            "step: 9710/10000...  loss: 4.5631...  0.0925 sec/batch\n",
            "step: 9720/10000...  loss: 4.5038...  0.0935 sec/batch\n",
            "step: 9730/10000...  loss: 4.6602...  0.0902 sec/batch\n",
            "step: 9740/10000...  loss: 4.5223...  0.0892 sec/batch\n",
            "step: 9750/10000...  loss: 4.6234...  0.1100 sec/batch\n",
            "step: 9760/10000...  loss: 4.6627...  0.0969 sec/batch\n",
            "step: 9770/10000...  loss: 4.5483...  0.0852 sec/batch\n",
            "step: 9780/10000...  loss: 4.5161...  0.0908 sec/batch\n",
            "step: 9790/10000...  loss: 4.4137...  0.0879 sec/batch\n",
            "step: 9800/10000...  loss: 4.5930...  0.1010 sec/batch\n",
            "step: 9810/10000...  loss: 4.5248...  0.0950 sec/batch\n",
            "step: 9820/10000...  loss: 4.4703...  0.0871 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 9830/10000...  loss: 4.5538...  0.1049 sec/batch\n",
            "step: 9840/10000...  loss: 4.5746...  0.0896 sec/batch\n",
            "step: 9850/10000...  loss: 4.6019...  0.0980 sec/batch\n",
            "step: 9860/10000...  loss: 4.6320...  0.0962 sec/batch\n",
            "step: 9870/10000...  loss: 4.4107...  0.0946 sec/batch\n",
            "step: 9880/10000...  loss: 4.4157...  0.1076 sec/batch\n",
            "step: 9890/10000...  loss: 4.5488...  0.1011 sec/batch\n",
            "step: 9900/10000...  loss: 4.5771...  0.0882 sec/batch\n",
            "step: 9910/10000...  loss: 4.4354...  0.0886 sec/batch\n",
            "step: 9920/10000...  loss: 4.5482...  0.0934 sec/batch\n",
            "step: 9930/10000...  loss: 4.4400...  0.0935 sec/batch\n",
            "step: 9940/10000...  loss: 4.5097...  0.0948 sec/batch\n",
            "step: 9950/10000...  loss: 4.4637...  0.0865 sec/batch\n",
            "step: 9960/10000...  loss: 4.5473...  0.0890 sec/batch\n",
            "step: 9970/10000...  loss: 4.5969...  0.1137 sec/batch\n",
            "step: 9980/10000...  loss: 4.5175...  0.0815 sec/batch\n",
            "step: 9990/10000...  loss: 4.4894...  0.0850 sec/batch\n",
            "step: 10000/10000...  loss: 4.5577...  0.0850 sec/batch\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "E9QVfqWeSjFq",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 759
        },
        "outputId": "01ef50f8-dcb2-443c-a2d3-13ec887e1550"
      },
      "cell_type": "code",
      "source": [
        "!cd Char-RNN-TensorFlow && python sample.py  --use_embedding --converter_path model/poetry/converter.pkl  --checkpoint_path model/poetry/ --max_length 300"
      ],
      "execution_count": 10,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:From /content/Char-RNN-TensorFlow/model.py:93: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\r\n",
            "Instructions for updating:\r\n",
            "\r\n",
            "Future major versions of TensorFlow will allow gradients to flow\r\n",
            "into the labels input on backprop by default.\r\n",
            "\r\n",
            "See @{tf.nn.softmax_cross_entropy_with_logits_v2}.\r\n",
            "\n",
            "2018-07-28 14:48:41.426060: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:897] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2018-07-28 14:48:41.426702: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1392] Found device 0 with properties: \n",
            "name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235\n",
            "pciBusID: 0000:00:04.0\n",
            "totalMemory: 11.17GiB freeMemory: 11.10GiB\n",
            "2018-07-28 14:48:41.426772: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0\n",
            "2018-07-28 14:48:41.893570: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
            "2018-07-28 14:48:41.893656: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958]      0 \n",
            "2018-07-28 14:48:41.893701: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0:   N \n",
            "2018-07-28 14:48:41.894148: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10763 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7)\n",
            "Restored from: model/poetry/model-10000\n",
            "换<unk><unk><unk><unk>。\n",
            "一夜一相留，何人见清风。\n",
            "山中不可问，日夕一相知。\n",
            "一叶无相见，春光自未成。\n",
            "不堪无事处，不是白云人。\n",
            "一日一声去，一年山月深。\n",
            "一年何所识，何计不相寻。\n",
            "一日无人远，春山在故乡。\n",
            "一年无限处，不得不成名。\n",
            "不得无人去，何人见故人。\n",
            "不知江海上，一处向江边。\n",
            "一日无归处，孤舟在北城。\n",
            "山中春色远，月落月中深。\n",
            "一夜山边寺，春来一夜声。\n",
            "何时无限路，一日不归山。\n",
            "不见南山路，相知有一年。\n",
            "江湖无不得，山水有无心。\n",
            "白发何时远，南江一径新。\n",
            "江山何处去，云上故乡来。\n",
            "不有青青外，何人不见身。\n",
            "山风一夜雨，云色入秋风。\n",
            "何处不知客，相逢何不知。\n",
            "一年多一醉，何事在青山。\n",
            "一日江城外，春山\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "VeuGTwPCmzMq",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 759
        },
        "outputId": "c0b82612-d429-4677-ffb3-f88b04f1cd95"
      },
      "cell_type": "code",
      "source": [
        "!cd Char-RNN-TensorFlow && python sample.py  --use_embedding --converter_path model/poetry/converter.pkl  --checkpoint_path model/poetry/ --max_length 300"
      ],
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:From /content/Char-RNN-TensorFlow/model.py:93: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\r\n",
            "Instructions for updating:\r\n",
            "\r\n",
            "Future major versions of TensorFlow will allow gradients to flow\r\n",
            "into the labels input on backprop by default.\r\n",
            "\r\n",
            "See @{tf.nn.softmax_cross_entropy_with_logits_v2}.\r\n",
            "\n",
            "2018-07-28 15:56:49.830940: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:897] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2018-07-28 15:56:49.831640: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1392] Found device 0 with properties: \n",
            "name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235\n",
            "pciBusID: 0000:00:04.0\n",
            "totalMemory: 11.17GiB freeMemory: 11.10GiB\n",
            "2018-07-28 15:56:49.831708: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0\n",
            "2018-07-28 15:56:50.376510: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
            "2018-07-28 15:56:50.376649: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958]      0 \n",
            "2018-07-28 15:56:50.376680: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0:   N \n",
            "2018-07-28 15:56:50.377229: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10763 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7)\n",
            "Restored from: model/poetry/model-10000\n",
            "换马去人人。\n",
            "一片不相逢，一年人亦归。\n",
            "不见江山去，相思有此时。\n",
            "江湖一日雨，春水一行人。\n",
            "白发秋风起，春风夜色深。\n",
            "一年无限路，何事是无穷。\n",
            "一日秋风起，相逢不可闻。\n",
            "不堪归路去，何事更无穷。\n",
            "日暮云山下，秋风月色中。\n",
            "一时何处见，不见旧城中。\n",
            "不得青山里，还知白发游。\n",
            "不知江海去，不得旧人归。\n",
            "日日山中水，秋风夜不知。\n",
            "一来无旧事，无事不同人。\n",
            "不见江南路，相思不见年。\n",
            "不知人未尽，何处有无穷。\n",
            "何必长山去，何如此日来。\n",
            "一年无限路，不觉有时情。\n",
            "白日秋光远，孤舟日月深。\n",
            "江山无一日，山月向江边。\n",
            "何处归山外，无人不得心。\n",
            "何人见归去，不得在人心。\n",
            "白发无时远，无时见一声。\n",
            "山中山下后，月月\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "7DjKjBf_mjB8",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 759
        },
        "outputId": "91082271-c1fc-4a4e-c1d9-354faf4b2144"
      },
      "cell_type": "code",
      "source": [
        "!cd Char-RNN-TensorFlow && python sample.py  --use_embedding --converter_path model/poetry/converter.pkl  --checkpoint_path model/poetry/ --max_length 300"
      ],
      "execution_count": 16,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:From /content/Char-RNN-TensorFlow/model.py:93: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\r\n",
            "Instructions for updating:\r\n",
            "\r\n",
            "Future major versions of TensorFlow will allow gradients to flow\r\n",
            "into the labels input on backprop by default.\r\n",
            "\r\n",
            "See @{tf.nn.softmax_cross_entropy_with_logits_v2}.\r\n",
            "\n",
            "2018-07-28 15:55:51.179846: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:897] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2018-07-28 15:55:51.180428: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1392] Found device 0 with properties: \n",
            "name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235\n",
            "pciBusID: 0000:00:04.0\n",
            "totalMemory: 11.17GiB freeMemory: 11.10GiB\n",
            "2018-07-28 15:55:51.180482: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0\n",
            "2018-07-28 15:55:51.659481: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
            "2018-07-28 15:55:51.659602: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958]      0 \n",
            "2018-07-28 15:55:51.659647: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0:   N \n",
            "2018-07-28 15:55:51.660185: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10763 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7)\n",
            "Restored from: model/poetry/model-10000\n",
            "巫山上。\n",
            "不见一时心，相思有相见。\n",
            "一日无一声，春山不可待。\n",
            "春风入云树，落日落秋色。\n",
            "不见江山月，不知无限客。\n",
            "山风入山月，山影一声发。\n",
            "春草有山月，孤舟不得人。\n",
            "一日秋草下，春山春草深。\n",
            "一时不得事，相见无时心。\n",
            "春风不可见，春雨不成天。\n",
            "何处有人事，不知江外来。\n",
            "一声春草里，白发白头间。\n",
            "不觉无心事，无心不可归。\n",
            "江边一夜雨，月落不成风。\n",
            "不得长生处，何人见此心。\n",
            "江中一千里，山下不相留。\n",
            "一夜无人去，何人是故人。\n",
            "江湖不得别，何处是归期。\n",
            "一日不得事，何年不可知。\n",
            "一回江汉水，不见故人稀。\n",
            "何处有人意，无人不可知。\n",
            "何时无此事，相见在南风。\n",
            "何处不可见，何人见此心。\n",
            "不知山上路，一夜更何\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "lmBy6gYvXNw2",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 67
        },
        "outputId": "3ac43e77-a396-4d41-d91f-7653d99b6cbc"
      },
      "cell_type": "code",
      "source": [
        "!curl https://gist.githubusercontent.com/utensil/93b65c93364059246b1321e3e0a6e0fe/raw/22e6ac2f03131fe4d78013873350a6febf91bcda/writings.txt > Char-RNN-TensorFlow/data/writings.txt"
      ],
      "execution_count": 11,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "  % Total    % Received % Xferd  Average Speed   Time    Time     Time  Current\r\n",
            "                                 Dload  Upload   Total   Spent    Left  Speed\n",
            "100  358k  100  358k    0     0   358k      0  0:00:01 --:--:--  0:00:01 1683k\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "dTjxHFaWX0At",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 185
        },
        "outputId": "732abe60-4315-4a2b-887f-2ef8c3bf74fa"
      },
      "cell_type": "code",
      "source": [
        "!head Char-RNN-TensorFlow/data/writings.txt"
      ],
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "岩浆\r\n",
            "----\r\n",
            "\r\n",
            "时偿积欠缺填勉， 凝途失涩兴阑珊。\r\n",
            "\r\n",
            "奔流方遒黯如没， 趣中汲静思里安。\r\n",
            "\r\n",
            "泳夜\r\n",
            "----\r\n",
            "\r\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "9fL_SJUcX3g9",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 17173
        },
        "outputId": "1d5cac0f-8540-4df0-a83d-27326f270c50"
      },
      "cell_type": "code",
      "source": [
        "!cd Char-RNN-TensorFlow && python train.py --use_embedding True --input_file data/writings.txt --num_steps 80 --name writings --learning_rate 0.005 --num_seqs 32 --num_layers 3 --embedding_size 256 --lstm_size 256  --max_steps 10000"
      ],
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "3029\n",
            "3030\n",
            "WARNING:tensorflow:From /content/Char-RNN-TensorFlow/model.py:93: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\n",
            "Instructions for updating:\n",
            "\n",
            "Future major versions of TensorFlow will allow gradients to flow\n",
            "into the labels input on backprop by default.\n",
            "\n",
            "See @{tf.nn.softmax_cross_entropy_with_logits_v2}.\n",
            "\n",
            "2018-07-28 14:53:04.354325: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:897] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2018-07-28 14:53:04.355069: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1392] Found device 0 with properties: \n",
            "name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235\n",
            "pciBusID: 0000:00:04.0\n",
            "totalMemory: 11.17GiB freeMemory: 11.10GiB\n",
            "2018-07-28 14:53:04.355153: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0\n",
            "2018-07-28 14:53:04.899401: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
            "2018-07-28 14:53:04.899489: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958]      0 \n",
            "2018-07-28 14:53:04.899521: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0:   N \n",
            "2018-07-28 14:53:04.899881: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10763 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7)\n",
            "step: 10/10000...  loss: 6.5528...  0.3863 sec/batch\n",
            "step: 20/10000...  loss: 6.3371...  0.4065 sec/batch\n",
            "step: 30/10000...  loss: 6.2721...  0.3938 sec/batch\n",
            "step: 40/10000...  loss: 6.2588...  0.3733 sec/batch\n",
            "step: 50/10000...  loss: 6.2303...  0.3929 sec/batch\n",
            "step: 60/10000...  loss: 6.1394...  0.3654 sec/batch\n",
            "step: 70/10000...  loss: 6.2148...  0.3766 sec/batch\n",
            "step: 80/10000...  loss: 5.9511...  0.4000 sec/batch\n",
            "step: 90/10000...  loss: 5.8576...  0.4187 sec/batch\n",
            "step: 100/10000...  loss: 6.0163...  0.3892 sec/batch\n",
            "step: 110/10000...  loss: 5.8947...  0.3591 sec/batch\n",
            "step: 120/10000...  loss: 5.7140...  0.3653 sec/batch\n",
            "step: 130/10000...  loss: 5.6042...  0.3671 sec/batch\n",
            "step: 140/10000...  loss: 5.7144...  0.3840 sec/batch\n",
            "step: 150/10000...  loss: 5.6692...  0.3783 sec/batch\n",
            "step: 160/10000...  loss: 5.6157...  0.3726 sec/batch\n",
            "step: 170/10000...  loss: 5.6349...  0.3567 sec/batch\n",
            "step: 180/10000...  loss: 5.4433...  0.3522 sec/batch\n",
            "step: 190/10000...  loss: 5.5300...  0.3734 sec/batch\n",
            "step: 200/10000...  loss: 5.6150...  0.3990 sec/batch\n",
            "step: 210/10000...  loss: 5.5543...  0.4073 sec/batch\n",
            "step: 220/10000...  loss: 5.5812...  0.4080 sec/batch\n",
            "step: 230/10000...  loss: 5.4584...  0.3736 sec/batch\n",
            "step: 240/10000...  loss: 5.3706...  0.3523 sec/batch\n",
            "step: 250/10000...  loss: 5.4954...  0.3429 sec/batch\n",
            "step: 260/10000...  loss: 5.4528...  0.3674 sec/batch\n",
            "step: 270/10000...  loss: 5.2056...  0.3546 sec/batch\n",
            "step: 280/10000...  loss: 5.3320...  0.3517 sec/batch\n",
            "step: 290/10000...  loss: 5.2262...  0.3556 sec/batch\n",
            "step: 300/10000...  loss: 5.4526...  0.3542 sec/batch\n",
            "step: 310/10000...  loss: 5.2831...  0.3438 sec/batch\n",
            "step: 320/10000...  loss: 5.2162...  0.3576 sec/batch\n",
            "step: 330/10000...  loss: 5.3452...  0.3223 sec/batch\n",
            "step: 340/10000...  loss: 5.1004...  0.3483 sec/batch\n",
            "step: 350/10000...  loss: 5.0863...  0.3444 sec/batch\n",
            "step: 360/10000...  loss: 5.3384...  0.3447 sec/batch\n",
            "step: 370/10000...  loss: 5.2611...  0.3778 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 380/10000...  loss: 5.0635...  0.3699 sec/batch\n",
            "step: 390/10000...  loss: 4.9378...  0.3674 sec/batch\n",
            "step: 400/10000...  loss: 5.0683...  0.3618 sec/batch\n",
            "step: 410/10000...  loss: 5.0456...  0.3767 sec/batch\n",
            "step: 420/10000...  loss: 5.0048...  0.3545 sec/batch\n",
            "step: 430/10000...  loss: 5.0199...  0.3471 sec/batch\n",
            "step: 440/10000...  loss: 4.8375...  0.4050 sec/batch\n",
            "step: 450/10000...  loss: 5.0237...  0.3508 sec/batch\n",
            "step: 460/10000...  loss: 5.0744...  0.3514 sec/batch\n",
            "step: 470/10000...  loss: 5.0378...  0.3434 sec/batch\n",
            "step: 480/10000...  loss: 5.0815...  0.3651 sec/batch\n",
            "step: 490/10000...  loss: 4.9712...  0.4092 sec/batch\n",
            "step: 500/10000...  loss: 4.8911...  0.3411 sec/batch\n",
            "step: 510/10000...  loss: 5.0074...  0.3856 sec/batch\n",
            "step: 520/10000...  loss: 4.9737...  0.3690 sec/batch\n",
            "step: 530/10000...  loss: 4.7998...  0.3811 sec/batch\n",
            "step: 540/10000...  loss: 4.9263...  0.3966 sec/batch\n",
            "step: 550/10000...  loss: 4.8151...  0.3455 sec/batch\n",
            "step: 560/10000...  loss: 5.0504...  0.3417 sec/batch\n",
            "step: 570/10000...  loss: 4.8361...  0.3414 sec/batch\n",
            "step: 580/10000...  loss: 4.8241...  0.3572 sec/batch\n",
            "step: 590/10000...  loss: 4.9783...  0.3591 sec/batch\n",
            "step: 600/10000...  loss: 4.6912...  0.4087 sec/batch\n",
            "step: 610/10000...  loss: 4.7281...  0.3660 sec/batch\n",
            "step: 620/10000...  loss: 4.9890...  0.3504 sec/batch\n",
            "step: 630/10000...  loss: 4.8990...  0.3446 sec/batch\n",
            "step: 640/10000...  loss: 4.6740...  0.3450 sec/batch\n",
            "step: 650/10000...  loss: 4.5718...  0.4262 sec/batch\n",
            "step: 660/10000...  loss: 4.7221...  0.4059 sec/batch\n",
            "step: 670/10000...  loss: 4.7162...  0.3759 sec/batch\n",
            "step: 680/10000...  loss: 4.6300...  0.3825 sec/batch\n",
            "step: 690/10000...  loss: 4.6959...  0.3660 sec/batch\n",
            "step: 700/10000...  loss: 4.5142...  0.3580 sec/batch\n",
            "step: 710/10000...  loss: 4.7169...  0.3574 sec/batch\n",
            "step: 720/10000...  loss: 4.7674...  0.3721 sec/batch\n",
            "step: 730/10000...  loss: 4.7836...  0.3614 sec/batch\n",
            "step: 740/10000...  loss: 4.7416...  0.3548 sec/batch\n",
            "step: 750/10000...  loss: 4.6845...  0.3704 sec/batch\n",
            "step: 760/10000...  loss: 4.6068...  0.3599 sec/batch\n",
            "step: 770/10000...  loss: 4.7097...  0.3488 sec/batch\n",
            "step: 780/10000...  loss: 4.6885...  0.3772 sec/batch\n",
            "step: 790/10000...  loss: 4.5074...  0.3989 sec/batch\n",
            "step: 800/10000...  loss: 4.6378...  0.3447 sec/batch\n",
            "step: 810/10000...  loss: 4.5586...  0.3849 sec/batch\n",
            "step: 820/10000...  loss: 4.8078...  0.3857 sec/batch\n",
            "step: 830/10000...  loss: 4.5871...  0.3871 sec/batch\n",
            "step: 840/10000...  loss: 4.5512...  0.3737 sec/batch\n",
            "step: 850/10000...  loss: 4.6951...  0.3932 sec/batch\n",
            "step: 860/10000...  loss: 4.4475...  0.3406 sec/batch\n",
            "step: 870/10000...  loss: 4.4440...  0.3595 sec/batch\n",
            "step: 880/10000...  loss: 4.7116...  0.3667 sec/batch\n",
            "step: 890/10000...  loss: 4.6447...  0.3437 sec/batch\n",
            "step: 900/10000...  loss: 4.3886...  0.3644 sec/batch\n",
            "step: 910/10000...  loss: 4.3531...  0.3610 sec/batch\n",
            "step: 920/10000...  loss: 4.4721...  0.3507 sec/batch\n",
            "step: 930/10000...  loss: 4.4785...  0.3580 sec/batch\n",
            "step: 940/10000...  loss: 4.4021...  0.3512 sec/batch\n",
            "step: 950/10000...  loss: 4.4578...  0.3834 sec/batch\n",
            "step: 960/10000...  loss: 4.2982...  0.3569 sec/batch\n",
            "step: 970/10000...  loss: 4.5150...  0.3449 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 980/10000...  loss: 4.5141...  0.3851 sec/batch\n",
            "step: 990/10000...  loss: 4.5233...  0.3593 sec/batch\n",
            "step: 1000/10000...  loss: 4.5321...  0.3749 sec/batch\n",
            "step: 1010/10000...  loss: 4.4673...  0.3613 sec/batch\n",
            "step: 1020/10000...  loss: 4.3742...  0.3610 sec/batch\n",
            "step: 1030/10000...  loss: 4.5113...  0.3832 sec/batch\n",
            "step: 1040/10000...  loss: 4.4524...  0.3775 sec/batch\n",
            "step: 1050/10000...  loss: 4.2954...  0.3630 sec/batch\n",
            "step: 1060/10000...  loss: 4.3932...  0.3803 sec/batch\n",
            "step: 1070/10000...  loss: 4.3451...  0.3670 sec/batch\n",
            "step: 1080/10000...  loss: 4.5815...  0.3486 sec/batch\n",
            "step: 1090/10000...  loss: 4.3942...  0.3544 sec/batch\n",
            "step: 1100/10000...  loss: 4.3556...  0.3893 sec/batch\n",
            "step: 1110/10000...  loss: 4.4578...  0.3783 sec/batch\n",
            "step: 1120/10000...  loss: 4.2363...  0.3645 sec/batch\n",
            "step: 1130/10000...  loss: 4.2632...  0.3890 sec/batch\n",
            "step: 1140/10000...  loss: 4.5337...  0.3744 sec/batch\n",
            "step: 1150/10000...  loss: 4.4940...  0.3550 sec/batch\n",
            "step: 1160/10000...  loss: 4.1857...  0.3578 sec/batch\n",
            "step: 1170/10000...  loss: 4.1688...  0.3524 sec/batch\n",
            "step: 1180/10000...  loss: 4.2638...  0.3484 sec/batch\n",
            "step: 1190/10000...  loss: 4.2818...  0.3596 sec/batch\n",
            "step: 1200/10000...  loss: 4.2639...  0.3866 sec/batch\n",
            "step: 1210/10000...  loss: 4.2737...  0.3808 sec/batch\n",
            "step: 1220/10000...  loss: 4.1124...  0.3531 sec/batch\n",
            "step: 1230/10000...  loss: 4.2834...  0.2994 sec/batch\n",
            "step: 1240/10000...  loss: 4.3472...  0.3191 sec/batch\n",
            "step: 1250/10000...  loss: 4.3594...  0.3069 sec/batch\n",
            "step: 1260/10000...  loss: 4.3163...  0.3274 sec/batch\n",
            "step: 1270/10000...  loss: 4.2708...  0.3082 sec/batch\n",
            "step: 1280/10000...  loss: 4.2379...  0.3267 sec/batch\n",
            "step: 1290/10000...  loss: 4.3383...  0.3055 sec/batch\n",
            "step: 1300/10000...  loss: 4.3164...  0.3140 sec/batch\n",
            "step: 1310/10000...  loss: 4.1048...  0.3072 sec/batch\n",
            "step: 1320/10000...  loss: 4.2452...  0.3665 sec/batch\n",
            "step: 1330/10000...  loss: 4.1630...  0.3748 sec/batch\n",
            "step: 1340/10000...  loss: 4.4254...  0.3985 sec/batch\n",
            "step: 1350/10000...  loss: 4.2276...  0.3249 sec/batch\n",
            "step: 1360/10000...  loss: 4.1796...  0.3792 sec/batch\n",
            "step: 1370/10000...  loss: 4.3057...  0.3513 sec/batch\n",
            "step: 1380/10000...  loss: 4.1040...  0.3429 sec/batch\n",
            "step: 1390/10000...  loss: 4.0917...  0.3592 sec/batch\n",
            "step: 1400/10000...  loss: 4.3592...  0.3524 sec/batch\n",
            "step: 1410/10000...  loss: 4.3507...  0.3661 sec/batch\n",
            "step: 1420/10000...  loss: 4.0915...  0.3592 sec/batch\n",
            "step: 1430/10000...  loss: 4.0017...  0.3162 sec/batch\n",
            "step: 1440/10000...  loss: 4.1431...  0.2986 sec/batch\n",
            "step: 1450/10000...  loss: 4.1451...  0.2981 sec/batch\n",
            "step: 1460/10000...  loss: 4.1202...  0.3097 sec/batch\n",
            "step: 1470/10000...  loss: 4.1212...  0.3341 sec/batch\n",
            "step: 1480/10000...  loss: 3.9958...  0.3650 sec/batch\n",
            "step: 1490/10000...  loss: 4.1306...  0.3810 sec/batch\n",
            "step: 1500/10000...  loss: 4.2299...  0.3022 sec/batch\n",
            "step: 1510/10000...  loss: 4.2129...  0.3089 sec/batch\n",
            "step: 1520/10000...  loss: 4.2145...  0.3019 sec/batch\n",
            "step: 1530/10000...  loss: 4.1110...  0.3047 sec/batch\n",
            "step: 1540/10000...  loss: 4.1038...  0.3801 sec/batch\n",
            "step: 1550/10000...  loss: 4.1992...  0.3027 sec/batch\n",
            "step: 1560/10000...  loss: 4.2211...  0.3100 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 1570/10000...  loss: 4.0023...  0.3500 sec/batch\n",
            "step: 1580/10000...  loss: 4.1531...  0.3571 sec/batch\n",
            "step: 1590/10000...  loss: 4.0515...  0.3010 sec/batch\n",
            "step: 1600/10000...  loss: 4.2309...  0.3576 sec/batch\n",
            "step: 1610/10000...  loss: 4.0714...  0.3775 sec/batch\n",
            "step: 1620/10000...  loss: 4.0997...  0.3609 sec/batch\n",
            "step: 1630/10000...  loss: 4.1630...  0.3474 sec/batch\n",
            "step: 1640/10000...  loss: 3.9600...  0.3997 sec/batch\n",
            "step: 1650/10000...  loss: 3.9851...  0.4445 sec/batch\n",
            "step: 1660/10000...  loss: 4.2264...  0.3753 sec/batch\n",
            "step: 1670/10000...  loss: 4.1864...  0.4448 sec/batch\n",
            "step: 1680/10000...  loss: 3.9220...  0.4677 sec/batch\n",
            "step: 1690/10000...  loss: 3.9114...  0.3652 sec/batch\n",
            "step: 1700/10000...  loss: 4.0577...  0.2944 sec/batch\n",
            "step: 1710/10000...  loss: 4.0330...  0.3053 sec/batch\n",
            "step: 1720/10000...  loss: 3.9829...  0.3588 sec/batch\n",
            "step: 1730/10000...  loss: 4.0232...  0.4638 sec/batch\n",
            "step: 1740/10000...  loss: 3.8819...  0.3460 sec/batch\n",
            "step: 1750/10000...  loss: 4.0287...  0.3735 sec/batch\n",
            "step: 1760/10000...  loss: 4.0872...  0.3019 sec/batch\n",
            "step: 1770/10000...  loss: 4.1200...  0.3023 sec/batch\n",
            "step: 1780/10000...  loss: 4.0248...  0.3892 sec/batch\n",
            "step: 1790/10000...  loss: 4.0382...  0.3671 sec/batch\n",
            "step: 1800/10000...  loss: 3.9977...  0.3755 sec/batch\n",
            "step: 1810/10000...  loss: 4.1087...  0.4055 sec/batch\n",
            "step: 1820/10000...  loss: 4.0827...  0.3945 sec/batch\n",
            "step: 1830/10000...  loss: 3.9056...  0.3792 sec/batch\n",
            "step: 1840/10000...  loss: 4.0186...  0.2970 sec/batch\n",
            "step: 1850/10000...  loss: 3.9244...  0.3239 sec/batch\n",
            "step: 1860/10000...  loss: 4.1570...  0.3023 sec/batch\n",
            "step: 1870/10000...  loss: 4.0071...  0.3489 sec/batch\n",
            "step: 1880/10000...  loss: 3.9597...  0.3634 sec/batch\n",
            "step: 1890/10000...  loss: 4.0406...  0.4127 sec/batch\n",
            "step: 1900/10000...  loss: 3.8083...  0.3980 sec/batch\n",
            "step: 1910/10000...  loss: 3.9085...  0.3053 sec/batch\n",
            "step: 1920/10000...  loss: 4.1244...  0.3241 sec/batch\n",
            "step: 1930/10000...  loss: 4.0733...  0.3163 sec/batch\n",
            "step: 1940/10000...  loss: 3.7996...  0.3626 sec/batch\n",
            "step: 1950/10000...  loss: 3.8524...  0.3672 sec/batch\n",
            "step: 1960/10000...  loss: 3.9197...  0.3642 sec/batch\n",
            "step: 1970/10000...  loss: 3.9330...  0.3020 sec/batch\n",
            "step: 1980/10000...  loss: 3.8782...  0.2979 sec/batch\n",
            "step: 1990/10000...  loss: 3.9142...  0.3494 sec/batch\n",
            "step: 2000/10000...  loss: 3.7746...  0.3740 sec/batch\n",
            "step: 2010/10000...  loss: 3.8967...  0.3657 sec/batch\n",
            "step: 2020/10000...  loss: 4.0239...  0.3583 sec/batch\n",
            "step: 2030/10000...  loss: 4.0038...  0.3126 sec/batch\n",
            "step: 2040/10000...  loss: 3.9852...  0.3175 sec/batch\n",
            "step: 2050/10000...  loss: 3.9709...  0.2990 sec/batch\n",
            "step: 2060/10000...  loss: 3.9172...  0.3599 sec/batch\n",
            "step: 2070/10000...  loss: 3.9883...  0.3431 sec/batch\n",
            "step: 2080/10000...  loss: 3.9866...  0.4040 sec/batch\n",
            "step: 2090/10000...  loss: 3.8017...  0.3658 sec/batch\n",
            "step: 2100/10000...  loss: 3.9495...  0.3763 sec/batch\n",
            "step: 2110/10000...  loss: 3.8285...  0.4149 sec/batch\n",
            "step: 2120/10000...  loss: 4.0369...  0.3167 sec/batch\n",
            "step: 2130/10000...  loss: 3.8738...  0.3618 sec/batch\n",
            "step: 2140/10000...  loss: 3.8720...  0.3908 sec/batch\n",
            "step: 2150/10000...  loss: 3.9392...  0.3616 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 2160/10000...  loss: 3.7432...  0.3813 sec/batch\n",
            "step: 2170/10000...  loss: 3.7732...  0.3444 sec/batch\n",
            "step: 2180/10000...  loss: 4.0171...  0.3105 sec/batch\n",
            "step: 2190/10000...  loss: 4.0009...  0.3106 sec/batch\n",
            "step: 2200/10000...  loss: 3.7382...  0.3026 sec/batch\n",
            "step: 2210/10000...  loss: 3.7511...  0.3011 sec/batch\n",
            "step: 2220/10000...  loss: 3.8172...  0.3090 sec/batch\n",
            "step: 2230/10000...  loss: 3.8597...  0.3025 sec/batch\n",
            "step: 2240/10000...  loss: 3.8610...  0.3651 sec/batch\n",
            "step: 2250/10000...  loss: 3.8791...  0.3555 sec/batch\n",
            "step: 2260/10000...  loss: 3.7018...  0.3596 sec/batch\n",
            "step: 2270/10000...  loss: 3.8272...  0.3518 sec/batch\n",
            "step: 2280/10000...  loss: 3.9058...  0.3429 sec/batch\n",
            "step: 2290/10000...  loss: 3.9462...  0.3417 sec/batch\n",
            "step: 2300/10000...  loss: 3.8936...  0.3608 sec/batch\n",
            "step: 2310/10000...  loss: 3.8373...  0.3620 sec/batch\n",
            "step: 2320/10000...  loss: 3.8111...  0.3495 sec/batch\n",
            "step: 2330/10000...  loss: 3.9381...  0.4245 sec/batch\n",
            "step: 2340/10000...  loss: 3.9296...  0.3944 sec/batch\n",
            "step: 2350/10000...  loss: 3.7179...  0.3590 sec/batch\n",
            "step: 2360/10000...  loss: 3.8697...  0.3692 sec/batch\n",
            "step: 2370/10000...  loss: 3.7543...  0.3104 sec/batch\n",
            "step: 2380/10000...  loss: 3.9665...  0.3199 sec/batch\n",
            "step: 2390/10000...  loss: 3.8271...  0.3485 sec/batch\n",
            "step: 2400/10000...  loss: 3.8107...  0.3647 sec/batch\n",
            "step: 2410/10000...  loss: 3.8799...  0.3515 sec/batch\n",
            "step: 2420/10000...  loss: 3.6902...  0.4303 sec/batch\n",
            "step: 2430/10000...  loss: 3.7210...  0.3517 sec/batch\n",
            "step: 2440/10000...  loss: 3.9415...  0.3511 sec/batch\n",
            "step: 2450/10000...  loss: 3.9181...  0.3927 sec/batch\n",
            "step: 2460/10000...  loss: 3.6478...  0.3632 sec/batch\n",
            "step: 2470/10000...  loss: 3.6882...  0.4018 sec/batch\n",
            "step: 2480/10000...  loss: 3.7603...  0.4120 sec/batch\n",
            "step: 2490/10000...  loss: 3.7769...  0.3046 sec/batch\n",
            "step: 2500/10000...  loss: 3.7848...  0.3137 sec/batch\n",
            "step: 2510/10000...  loss: 3.7864...  0.3058 sec/batch\n",
            "step: 2520/10000...  loss: 3.6097...  0.3269 sec/batch\n",
            "step: 2530/10000...  loss: 3.7772...  0.3768 sec/batch\n",
            "step: 2540/10000...  loss: 3.8392...  0.3748 sec/batch\n",
            "step: 2550/10000...  loss: 3.8417...  0.3802 sec/batch\n",
            "step: 2560/10000...  loss: 3.8051...  0.3468 sec/batch\n",
            "step: 2570/10000...  loss: 3.7969...  0.3525 sec/batch\n",
            "step: 2580/10000...  loss: 3.7454...  0.3514 sec/batch\n",
            "step: 2590/10000...  loss: 3.8706...  0.3702 sec/batch\n",
            "step: 2600/10000...  loss: 3.8572...  0.3506 sec/batch\n",
            "step: 2610/10000...  loss: 3.6365...  0.3446 sec/batch\n",
            "step: 2620/10000...  loss: 3.7588...  0.3580 sec/batch\n",
            "step: 2630/10000...  loss: 3.7348...  0.3849 sec/batch\n",
            "step: 2640/10000...  loss: 3.9317...  0.3721 sec/batch\n",
            "step: 2650/10000...  loss: 3.7590...  0.3727 sec/batch\n",
            "step: 2660/10000...  loss: 3.8117...  0.3596 sec/batch\n",
            "step: 2670/10000...  loss: 3.8086...  0.3677 sec/batch\n",
            "step: 2680/10000...  loss: 3.6724...  0.3515 sec/batch\n",
            "step: 2690/10000...  loss: 3.6460...  0.4057 sec/batch\n",
            "step: 2700/10000...  loss: 3.8833...  0.3526 sec/batch\n",
            "step: 2710/10000...  loss: 3.8245...  0.3446 sec/batch\n",
            "step: 2720/10000...  loss: 3.6061...  0.3611 sec/batch\n",
            "step: 2730/10000...  loss: 3.6781...  0.3548 sec/batch\n",
            "step: 2740/10000...  loss: 3.6996...  0.3743 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 2750/10000...  loss: 3.7530...  0.3413 sec/batch\n",
            "step: 2760/10000...  loss: 3.7125...  0.3415 sec/batch\n",
            "step: 2770/10000...  loss: 3.7317...  0.3670 sec/batch\n",
            "step: 2780/10000...  loss: 3.5952...  0.3744 sec/batch\n",
            "step: 2790/10000...  loss: 3.7537...  0.3698 sec/batch\n",
            "step: 2800/10000...  loss: 3.7948...  0.3163 sec/batch\n",
            "step: 2810/10000...  loss: 3.8052...  0.2987 sec/batch\n",
            "step: 2820/10000...  loss: 3.7550...  0.3219 sec/batch\n",
            "step: 2830/10000...  loss: 3.7676...  0.3730 sec/batch\n",
            "step: 2840/10000...  loss: 3.7447...  0.3630 sec/batch\n",
            "step: 2850/10000...  loss: 3.8129...  0.3616 sec/batch\n",
            "step: 2860/10000...  loss: 3.7836...  0.3631 sec/batch\n",
            "step: 2870/10000...  loss: 3.5740...  0.3651 sec/batch\n",
            "step: 2880/10000...  loss: 3.7347...  0.3873 sec/batch\n",
            "step: 2890/10000...  loss: 3.6798...  0.3841 sec/batch\n",
            "step: 2900/10000...  loss: 3.8112...  0.3618 sec/batch\n",
            "step: 2910/10000...  loss: 3.7194...  0.3025 sec/batch\n",
            "step: 2920/10000...  loss: 3.7442...  0.3297 sec/batch\n",
            "step: 2930/10000...  loss: 3.7698...  0.3030 sec/batch\n",
            "step: 2940/10000...  loss: 3.6009...  0.2960 sec/batch\n",
            "step: 2950/10000...  loss: 3.6209...  0.3088 sec/batch\n",
            "step: 2960/10000...  loss: 3.8759...  0.3034 sec/batch\n",
            "step: 2970/10000...  loss: 3.8500...  0.3003 sec/batch\n",
            "step: 2980/10000...  loss: 3.5834...  0.3080 sec/batch\n",
            "step: 2990/10000...  loss: 3.6128...  0.2998 sec/batch\n",
            "step: 3000/10000...  loss: 3.6238...  0.3115 sec/batch\n",
            "step: 3010/10000...  loss: 3.7016...  0.3056 sec/batch\n",
            "step: 3020/10000...  loss: 3.6371...  0.3120 sec/batch\n",
            "step: 3030/10000...  loss: 3.6463...  0.3084 sec/batch\n",
            "step: 3040/10000...  loss: 3.5223...  0.3268 sec/batch\n",
            "step: 3050/10000...  loss: 3.6954...  0.3158 sec/batch\n",
            "step: 3060/10000...  loss: 3.7269...  0.3677 sec/batch\n",
            "step: 3070/10000...  loss: 3.7544...  0.3957 sec/batch\n",
            "step: 3080/10000...  loss: 3.6913...  0.3562 sec/batch\n",
            "step: 3090/10000...  loss: 3.6944...  0.3611 sec/batch\n",
            "step: 3100/10000...  loss: 3.6625...  0.3739 sec/batch\n",
            "step: 3110/10000...  loss: 3.6878...  0.3721 sec/batch\n",
            "step: 3120/10000...  loss: 3.7645...  0.3676 sec/batch\n",
            "step: 3130/10000...  loss: 3.5811...  0.3167 sec/batch\n",
            "step: 3140/10000...  loss: 3.6605...  0.3290 sec/batch\n",
            "step: 3150/10000...  loss: 3.6328...  0.3049 sec/batch\n",
            "step: 3160/10000...  loss: 3.7845...  0.3226 sec/batch\n",
            "step: 3170/10000...  loss: 3.6172...  0.3335 sec/batch\n",
            "step: 3180/10000...  loss: 3.6378...  0.3194 sec/batch\n",
            "step: 3190/10000...  loss: 3.7242...  0.3170 sec/batch\n",
            "step: 3200/10000...  loss: 3.5458...  0.3236 sec/batch\n",
            "step: 3210/10000...  loss: 3.5956...  0.3755 sec/batch\n",
            "step: 3220/10000...  loss: 3.8011...  0.4445 sec/batch\n",
            "step: 3230/10000...  loss: 3.7654...  0.3869 sec/batch\n",
            "step: 3240/10000...  loss: 3.5079...  0.3623 sec/batch\n",
            "step: 3250/10000...  loss: 3.5770...  0.3602 sec/batch\n",
            "step: 3260/10000...  loss: 3.5869...  0.4226 sec/batch\n",
            "step: 3270/10000...  loss: 3.6543...  0.3938 sec/batch\n",
            "step: 3280/10000...  loss: 3.6195...  0.3843 sec/batch\n",
            "step: 3290/10000...  loss: 3.6389...  0.4103 sec/batch\n",
            "step: 3300/10000...  loss: 3.4999...  0.3164 sec/batch\n",
            "step: 3310/10000...  loss: 3.6357...  0.3079 sec/batch\n",
            "step: 3320/10000...  loss: 3.7465...  0.3631 sec/batch\n",
            "step: 3330/10000...  loss: 3.6985...  0.3751 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 3340/10000...  loss: 3.6175...  0.3818 sec/batch\n",
            "step: 3350/10000...  loss: 3.6448...  0.3775 sec/batch\n",
            "step: 3360/10000...  loss: 3.6015...  0.3670 sec/batch\n",
            "step: 3370/10000...  loss: 3.6929...  0.4190 sec/batch\n",
            "step: 3380/10000...  loss: 3.7085...  0.3807 sec/batch\n",
            "step: 3390/10000...  loss: 3.5021...  0.4095 sec/batch\n",
            "step: 3400/10000...  loss: 3.6344...  0.4150 sec/batch\n",
            "step: 3410/10000...  loss: 3.5670...  0.3901 sec/batch\n",
            "step: 3420/10000...  loss: 3.7539...  0.4536 sec/batch\n",
            "step: 3430/10000...  loss: 3.6039...  0.3896 sec/batch\n",
            "step: 3440/10000...  loss: 3.6279...  0.3755 sec/batch\n",
            "step: 3450/10000...  loss: 3.6362...  0.3023 sec/batch\n",
            "step: 3460/10000...  loss: 3.4979...  0.3629 sec/batch\n",
            "step: 3470/10000...  loss: 3.5314...  0.4299 sec/batch\n",
            "step: 3480/10000...  loss: 3.7321...  0.3211 sec/batch\n",
            "step: 3490/10000...  loss: 3.7412...  0.3244 sec/batch\n",
            "step: 3500/10000...  loss: 3.4947...  0.3683 sec/batch\n",
            "step: 3510/10000...  loss: 3.5141...  0.3716 sec/batch\n",
            "step: 3520/10000...  loss: 3.5364...  0.3516 sec/batch\n",
            "step: 3530/10000...  loss: 3.6140...  0.3702 sec/batch\n",
            "step: 3540/10000...  loss: 3.5687...  0.3544 sec/batch\n",
            "step: 3550/10000...  loss: 3.6102...  0.3406 sec/batch\n",
            "step: 3560/10000...  loss: 3.4233...  0.3541 sec/batch\n",
            "step: 3570/10000...  loss: 3.5657...  0.3505 sec/batch\n",
            "step: 3580/10000...  loss: 3.6225...  0.3439 sec/batch\n",
            "step: 3590/10000...  loss: 3.6751...  0.3588 sec/batch\n",
            "step: 3600/10000...  loss: 3.6017...  0.3544 sec/batch\n",
            "step: 3610/10000...  loss: 3.5809...  0.3294 sec/batch\n",
            "step: 3620/10000...  loss: 3.5652...  0.3613 sec/batch\n",
            "step: 3630/10000...  loss: 3.6398...  0.3501 sec/batch\n",
            "step: 3640/10000...  loss: 3.6923...  0.3552 sec/batch\n",
            "step: 3650/10000...  loss: 3.4996...  0.3670 sec/batch\n",
            "step: 3660/10000...  loss: 3.6042...  0.3622 sec/batch\n",
            "step: 3670/10000...  loss: 3.5446...  0.3551 sec/batch\n",
            "step: 3680/10000...  loss: 3.7389...  0.3762 sec/batch\n",
            "step: 3690/10000...  loss: 3.5655...  0.3931 sec/batch\n",
            "step: 3700/10000...  loss: 3.5517...  0.3599 sec/batch\n",
            "step: 3710/10000...  loss: 3.6009...  0.4015 sec/batch\n",
            "step: 3720/10000...  loss: 3.4731...  0.3581 sec/batch\n",
            "step: 3730/10000...  loss: 3.4758...  0.3589 sec/batch\n",
            "step: 3740/10000...  loss: 3.7071...  0.3726 sec/batch\n",
            "step: 3750/10000...  loss: 3.6836...  0.3412 sec/batch\n",
            "step: 3760/10000...  loss: 3.4186...  0.3729 sec/batch\n",
            "step: 3770/10000...  loss: 3.4621...  0.3618 sec/batch\n",
            "step: 3780/10000...  loss: 3.5233...  0.3525 sec/batch\n",
            "step: 3790/10000...  loss: 3.5858...  0.4365 sec/batch\n",
            "step: 3800/10000...  loss: 3.5581...  0.3597 sec/batch\n",
            "step: 3810/10000...  loss: 3.5676...  0.3830 sec/batch\n",
            "step: 3820/10000...  loss: 3.3749...  0.3622 sec/batch\n",
            "step: 3830/10000...  loss: 3.5425...  0.3511 sec/batch\n",
            "step: 3840/10000...  loss: 3.6027...  0.3641 sec/batch\n",
            "step: 3850/10000...  loss: 3.5863...  0.3921 sec/batch\n",
            "step: 3860/10000...  loss: 3.5658...  0.3528 sec/batch\n",
            "step: 3870/10000...  loss: 3.5505...  0.3658 sec/batch\n",
            "step: 3880/10000...  loss: 3.5048...  0.3923 sec/batch\n",
            "step: 3890/10000...  loss: 3.6073...  0.3446 sec/batch\n",
            "step: 3900/10000...  loss: 3.6494...  0.3653 sec/batch\n",
            "step: 3910/10000...  loss: 3.4420...  0.4002 sec/batch\n",
            "step: 3920/10000...  loss: 3.5909...  0.4273 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 3930/10000...  loss: 3.4784...  0.3736 sec/batch\n",
            "step: 3940/10000...  loss: 3.6948...  0.3885 sec/batch\n",
            "step: 3950/10000...  loss: 3.5120...  0.4365 sec/batch\n",
            "step: 3960/10000...  loss: 3.5157...  0.3664 sec/batch\n",
            "step: 3970/10000...  loss: 3.5811...  0.3903 sec/batch\n",
            "step: 3980/10000...  loss: 3.4422...  0.3553 sec/batch\n",
            "step: 3990/10000...  loss: 3.4418...  0.3996 sec/batch\n",
            "step: 4000/10000...  loss: 3.6450...  0.3645 sec/batch\n",
            "step: 4010/10000...  loss: 3.6575...  0.3892 sec/batch\n",
            "step: 4020/10000...  loss: 3.4203...  0.3856 sec/batch\n",
            "step: 4030/10000...  loss: 3.4925...  0.3488 sec/batch\n",
            "step: 4040/10000...  loss: 3.4452...  0.3670 sec/batch\n",
            "step: 4050/10000...  loss: 3.5409...  0.4199 sec/batch\n",
            "step: 4060/10000...  loss: 3.5473...  0.3705 sec/batch\n",
            "step: 4070/10000...  loss: 3.5583...  0.4388 sec/batch\n",
            "step: 4080/10000...  loss: 3.3464...  0.3652 sec/batch\n",
            "step: 4090/10000...  loss: 3.5060...  0.3868 sec/batch\n",
            "step: 4100/10000...  loss: 3.5980...  0.4088 sec/batch\n",
            "step: 4110/10000...  loss: 3.6017...  0.3943 sec/batch\n",
            "step: 4120/10000...  loss: 3.5265...  0.4389 sec/batch\n",
            "step: 4130/10000...  loss: 3.5541...  0.3706 sec/batch\n",
            "step: 4140/10000...  loss: 3.4836...  0.4084 sec/batch\n",
            "step: 4150/10000...  loss: 3.5499...  0.3914 sec/batch\n",
            "step: 4160/10000...  loss: 3.6449...  0.3601 sec/batch\n",
            "step: 4170/10000...  loss: 3.4058...  0.3920 sec/batch\n",
            "step: 4180/10000...  loss: 3.5509...  0.3675 sec/batch\n",
            "step: 4190/10000...  loss: 3.4596...  0.3673 sec/batch\n",
            "step: 4200/10000...  loss: 3.6134...  0.3607 sec/batch\n",
            "step: 4210/10000...  loss: 3.4812...  0.3882 sec/batch\n",
            "step: 4220/10000...  loss: 3.4674...  0.3668 sec/batch\n",
            "step: 4230/10000...  loss: 3.5625...  0.4063 sec/batch\n",
            "step: 4240/10000...  loss: 3.4558...  0.3764 sec/batch\n",
            "step: 4250/10000...  loss: 3.4148...  0.3829 sec/batch\n",
            "step: 4260/10000...  loss: 3.6311...  0.4135 sec/batch\n",
            "step: 4270/10000...  loss: 3.6153...  0.3798 sec/batch\n",
            "step: 4280/10000...  loss: 3.3791...  0.3695 sec/batch\n",
            "step: 4290/10000...  loss: 3.4043...  0.3923 sec/batch\n",
            "step: 4300/10000...  loss: 3.4486...  0.3474 sec/batch\n",
            "step: 4310/10000...  loss: 3.5095...  0.3989 sec/batch\n",
            "step: 4320/10000...  loss: 3.4933...  0.3395 sec/batch\n",
            "step: 4330/10000...  loss: 3.4983...  0.3052 sec/batch\n",
            "step: 4340/10000...  loss: 3.3339...  0.3242 sec/batch\n",
            "step: 4350/10000...  loss: 3.4939...  0.3662 sec/batch\n",
            "step: 4360/10000...  loss: 3.5618...  0.3782 sec/batch\n",
            "step: 4370/10000...  loss: 3.5442...  0.3500 sec/batch\n",
            "step: 4380/10000...  loss: 3.4714...  0.3583 sec/batch\n",
            "step: 4390/10000...  loss: 3.4945...  0.3964 sec/batch\n",
            "step: 4400/10000...  loss: 3.5040...  0.3774 sec/batch\n",
            "step: 4410/10000...  loss: 3.5228...  0.3872 sec/batch\n",
            "step: 4420/10000...  loss: 3.6105...  0.3688 sec/batch\n",
            "step: 4430/10000...  loss: 3.4255...  0.3729 sec/batch\n",
            "step: 4440/10000...  loss: 3.5174...  0.4281 sec/batch\n",
            "step: 4450/10000...  loss: 3.4510...  0.3880 sec/batch\n",
            "step: 4460/10000...  loss: 3.6152...  0.3897 sec/batch\n",
            "step: 4470/10000...  loss: 3.4354...  0.3757 sec/batch\n",
            "step: 4480/10000...  loss: 3.4908...  0.3257 sec/batch\n",
            "step: 4490/10000...  loss: 3.4943...  0.3138 sec/batch\n",
            "step: 4500/10000...  loss: 3.3717...  0.3198 sec/batch\n",
            "step: 4510/10000...  loss: 3.3864...  0.2946 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 4520/10000...  loss: 3.5899...  0.3189 sec/batch\n",
            "step: 4530/10000...  loss: 3.6222...  0.3917 sec/batch\n",
            "step: 4540/10000...  loss: 3.3392...  0.3573 sec/batch\n",
            "step: 4550/10000...  loss: 3.4117...  0.3127 sec/batch\n",
            "step: 4560/10000...  loss: 3.4366...  0.3660 sec/batch\n",
            "step: 4570/10000...  loss: 3.4827...  0.3556 sec/batch\n",
            "step: 4580/10000...  loss: 3.4514...  0.3864 sec/batch\n",
            "step: 4590/10000...  loss: 3.4926...  0.3834 sec/batch\n",
            "step: 4600/10000...  loss: 3.3017...  0.3839 sec/batch\n",
            "step: 4610/10000...  loss: 3.4654...  0.3662 sec/batch\n",
            "step: 4620/10000...  loss: 3.5270...  0.3977 sec/batch\n",
            "step: 4630/10000...  loss: 3.5356...  0.3466 sec/batch\n",
            "step: 4640/10000...  loss: 3.4969...  0.4300 sec/batch\n",
            "step: 4650/10000...  loss: 3.4679...  0.2964 sec/batch\n",
            "step: 4660/10000...  loss: 3.4611...  0.3296 sec/batch\n",
            "step: 4670/10000...  loss: 3.4977...  0.3145 sec/batch\n",
            "step: 4680/10000...  loss: 3.5319...  0.3183 sec/batch\n",
            "step: 4690/10000...  loss: 3.3274...  0.3256 sec/batch\n",
            "step: 4700/10000...  loss: 3.4967...  0.3439 sec/batch\n",
            "step: 4710/10000...  loss: 3.4207...  0.3054 sec/batch\n",
            "step: 4720/10000...  loss: 3.6008...  0.3277 sec/batch\n",
            "step: 4730/10000...  loss: 3.4199...  0.3712 sec/batch\n",
            "step: 4740/10000...  loss: 3.4551...  0.3763 sec/batch\n",
            "step: 4750/10000...  loss: 3.5120...  0.3972 sec/batch\n",
            "step: 4760/10000...  loss: 3.3721...  0.3955 sec/batch\n",
            "step: 4770/10000...  loss: 3.4041...  0.4017 sec/batch\n",
            "step: 4780/10000...  loss: 3.5847...  0.3850 sec/batch\n",
            "step: 4790/10000...  loss: 3.5421...  0.3794 sec/batch\n",
            "step: 4800/10000...  loss: 3.3377...  0.3737 sec/batch\n",
            "step: 4810/10000...  loss: 3.3812...  0.3782 sec/batch\n",
            "step: 4820/10000...  loss: 3.4376...  0.3791 sec/batch\n",
            "step: 4830/10000...  loss: 3.5027...  0.3189 sec/batch\n",
            "step: 4840/10000...  loss: 3.4538...  0.3023 sec/batch\n",
            "step: 4850/10000...  loss: 3.4184...  0.3348 sec/batch\n",
            "step: 4860/10000...  loss: 3.2584...  0.3110 sec/batch\n",
            "step: 4870/10000...  loss: 3.4880...  0.3195 sec/batch\n",
            "step: 4880/10000...  loss: 3.5125...  0.3105 sec/batch\n",
            "step: 4890/10000...  loss: 3.5457...  0.3758 sec/batch\n",
            "step: 4900/10000...  loss: 3.4830...  0.4058 sec/batch\n",
            "step: 4910/10000...  loss: 3.4547...  0.3711 sec/batch\n",
            "step: 4920/10000...  loss: 3.4324...  0.3788 sec/batch\n",
            "step: 4930/10000...  loss: 3.4775...  0.3927 sec/batch\n",
            "step: 4940/10000...  loss: 3.5052...  0.3912 sec/batch\n",
            "step: 4950/10000...  loss: 3.3535...  0.3704 sec/batch\n",
            "step: 4960/10000...  loss: 3.4230...  0.3875 sec/batch\n",
            "step: 4970/10000...  loss: 3.3973...  0.4310 sec/batch\n",
            "step: 4980/10000...  loss: 3.5837...  0.3125 sec/batch\n",
            "step: 4990/10000...  loss: 3.4003...  0.3081 sec/batch\n",
            "step: 5000/10000...  loss: 3.4108...  0.4092 sec/batch\n",
            "step: 5010/10000...  loss: 3.4400...  0.3835 sec/batch\n",
            "step: 5020/10000...  loss: 3.3382...  0.3487 sec/batch\n",
            "step: 5030/10000...  loss: 3.3305...  0.3862 sec/batch\n",
            "step: 5040/10000...  loss: 3.5593...  0.3383 sec/batch\n",
            "step: 5050/10000...  loss: 3.5198...  0.3558 sec/batch\n",
            "step: 5060/10000...  loss: 3.3355...  0.3444 sec/batch\n",
            "step: 5070/10000...  loss: 3.3671...  0.3571 sec/batch\n",
            "step: 5080/10000...  loss: 3.3856...  0.3578 sec/batch\n",
            "step: 5090/10000...  loss: 3.4277...  0.4110 sec/batch\n",
            "step: 5100/10000...  loss: 3.4212...  0.4329 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 5110/10000...  loss: 3.3859...  0.3715 sec/batch\n",
            "step: 5120/10000...  loss: 3.2650...  0.3706 sec/batch\n",
            "step: 5130/10000...  loss: 3.4748...  0.3229 sec/batch\n",
            "step: 5140/10000...  loss: 3.4633...  0.3215 sec/batch\n",
            "step: 5150/10000...  loss: 3.5263...  0.3067 sec/batch\n",
            "step: 5160/10000...  loss: 3.4294...  0.3112 sec/batch\n",
            "step: 5170/10000...  loss: 3.3648...  0.3053 sec/batch\n",
            "step: 5180/10000...  loss: 3.4044...  0.3225 sec/batch\n",
            "step: 5190/10000...  loss: 3.4552...  0.3101 sec/batch\n",
            "step: 5200/10000...  loss: 3.4801...  0.3283 sec/batch\n",
            "step: 5210/10000...  loss: 3.2786...  0.3182 sec/batch\n",
            "step: 5220/10000...  loss: 3.4250...  0.3253 sec/batch\n",
            "step: 5230/10000...  loss: 3.3599...  0.3119 sec/batch\n",
            "step: 5240/10000...  loss: 3.5392...  0.3324 sec/batch\n",
            "step: 5250/10000...  loss: 3.4000...  0.3680 sec/batch\n",
            "step: 5260/10000...  loss: 3.4063...  0.3915 sec/batch\n",
            "step: 5270/10000...  loss: 3.4498...  0.3764 sec/batch\n",
            "step: 5280/10000...  loss: 3.3663...  0.3701 sec/batch\n",
            "step: 5290/10000...  loss: 3.3090...  0.4116 sec/batch\n",
            "step: 5300/10000...  loss: 3.5301...  0.4324 sec/batch\n",
            "step: 5310/10000...  loss: 3.4767...  0.3846 sec/batch\n",
            "step: 5320/10000...  loss: 3.3157...  0.4349 sec/batch\n",
            "step: 5330/10000...  loss: 3.3302...  0.3677 sec/batch\n",
            "step: 5340/10000...  loss: 3.3771...  0.3572 sec/batch\n",
            "step: 5350/10000...  loss: 3.4148...  0.4051 sec/batch\n",
            "step: 5360/10000...  loss: 3.4040...  0.3563 sec/batch\n",
            "step: 5370/10000...  loss: 3.3973...  0.3951 sec/batch\n",
            "step: 5380/10000...  loss: 3.2471...  0.3549 sec/batch\n",
            "step: 5390/10000...  loss: 3.4461...  0.3459 sec/batch\n",
            "step: 5400/10000...  loss: 3.4701...  0.3427 sec/batch\n",
            "step: 5410/10000...  loss: 3.4626...  0.3663 sec/batch\n",
            "step: 5420/10000...  loss: 3.4472...  0.3126 sec/batch\n",
            "step: 5430/10000...  loss: 3.3845...  0.3856 sec/batch\n",
            "step: 5440/10000...  loss: 3.3408...  0.4253 sec/batch\n",
            "step: 5450/10000...  loss: 3.4366...  0.4196 sec/batch\n",
            "step: 5460/10000...  loss: 3.4623...  0.4029 sec/batch\n",
            "step: 5470/10000...  loss: 3.2996...  0.3884 sec/batch\n",
            "step: 5480/10000...  loss: 3.3516...  0.3840 sec/batch\n",
            "step: 5490/10000...  loss: 3.3840...  0.3780 sec/batch\n",
            "step: 5500/10000...  loss: 3.4830...  0.3930 sec/batch\n",
            "step: 5510/10000...  loss: 3.3992...  0.3928 sec/batch\n",
            "step: 5520/10000...  loss: 3.3723...  0.3844 sec/batch\n",
            "step: 5530/10000...  loss: 3.4530...  0.3582 sec/batch\n",
            "step: 5540/10000...  loss: 3.2945...  0.3500 sec/batch\n",
            "step: 5550/10000...  loss: 3.2469...  0.4040 sec/batch\n",
            "step: 5560/10000...  loss: 3.4846...  0.3912 sec/batch\n",
            "step: 5570/10000...  loss: 3.4699...  0.3838 sec/batch\n",
            "step: 5580/10000...  loss: 3.2403...  0.4273 sec/batch\n",
            "step: 5590/10000...  loss: 3.2730...  0.4352 sec/batch\n",
            "step: 5600/10000...  loss: 3.3511...  0.3581 sec/batch\n",
            "step: 5610/10000...  loss: 3.3692...  0.3985 sec/batch\n",
            "step: 5620/10000...  loss: 3.3898...  0.3772 sec/batch\n",
            "step: 5630/10000...  loss: 3.4274...  0.3458 sec/batch\n",
            "step: 5640/10000...  loss: 3.2729...  0.3627 sec/batch\n",
            "step: 5650/10000...  loss: 3.4467...  0.4096 sec/batch\n",
            "step: 5660/10000...  loss: 3.4666...  0.4255 sec/batch\n",
            "step: 5670/10000...  loss: 3.4247...  0.3962 sec/batch\n",
            "step: 5680/10000...  loss: 3.4026...  0.3790 sec/batch\n",
            "step: 5690/10000...  loss: 3.3936...  0.3995 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 5700/10000...  loss: 3.3496...  0.3813 sec/batch\n",
            "step: 5710/10000...  loss: 3.4115...  0.3683 sec/batch\n",
            "step: 5720/10000...  loss: 3.4696...  0.4006 sec/batch\n",
            "step: 5730/10000...  loss: 3.2536...  0.3981 sec/batch\n",
            "step: 5740/10000...  loss: 3.3771...  0.3488 sec/batch\n",
            "step: 5750/10000...  loss: 3.3523...  0.3918 sec/batch\n",
            "step: 5760/10000...  loss: 3.4490...  0.3487 sec/batch\n",
            "step: 5770/10000...  loss: 3.3559...  0.4229 sec/batch\n",
            "step: 5780/10000...  loss: 3.3325...  0.3629 sec/batch\n",
            "step: 5790/10000...  loss: 3.3738...  0.4115 sec/batch\n",
            "step: 5800/10000...  loss: 3.2481...  0.4140 sec/batch\n",
            "step: 5810/10000...  loss: 3.2923...  0.3702 sec/batch\n",
            "step: 5820/10000...  loss: 3.4949...  0.3566 sec/batch\n",
            "step: 5830/10000...  loss: 3.4618...  0.3504 sec/batch\n",
            "step: 5840/10000...  loss: 3.2457...  0.4090 sec/batch\n",
            "step: 5850/10000...  loss: 3.2862...  0.3475 sec/batch\n",
            "step: 5860/10000...  loss: 3.3000...  0.3543 sec/batch\n",
            "step: 5870/10000...  loss: 3.3617...  0.3503 sec/batch\n",
            "step: 5880/10000...  loss: 3.3439...  0.3924 sec/batch\n",
            "step: 5890/10000...  loss: 3.3554...  0.3460 sec/batch\n",
            "step: 5900/10000...  loss: 3.2021...  0.3680 sec/batch\n",
            "step: 5910/10000...  loss: 3.3534...  0.3841 sec/batch\n",
            "step: 5920/10000...  loss: 3.4368...  0.3605 sec/batch\n",
            "step: 5930/10000...  loss: 3.4410...  0.3740 sec/batch\n",
            "step: 5940/10000...  loss: 3.3300...  0.3518 sec/batch\n",
            "step: 5950/10000...  loss: 3.3520...  0.3815 sec/batch\n",
            "step: 5960/10000...  loss: 3.3190...  0.3546 sec/batch\n",
            "step: 5970/10000...  loss: 3.3669...  0.3721 sec/batch\n",
            "step: 5980/10000...  loss: 3.4609...  0.3432 sec/batch\n",
            "step: 5990/10000...  loss: 3.2475...  0.3620 sec/batch\n",
            "step: 6000/10000...  loss: 3.3769...  0.3821 sec/batch\n",
            "step: 6010/10000...  loss: 3.3321...  0.3659 sec/batch\n",
            "step: 6020/10000...  loss: 3.4747...  0.4085 sec/batch\n",
            "step: 6030/10000...  loss: 3.3160...  0.3791 sec/batch\n",
            "step: 6040/10000...  loss: 3.3289...  0.3743 sec/batch\n",
            "step: 6050/10000...  loss: 3.3784...  0.4019 sec/batch\n",
            "step: 6060/10000...  loss: 3.2490...  0.3849 sec/batch\n",
            "step: 6070/10000...  loss: 3.2661...  0.3927 sec/batch\n",
            "step: 6080/10000...  loss: 3.4885...  0.3965 sec/batch\n",
            "step: 6090/10000...  loss: 3.4254...  0.3717 sec/batch\n",
            "step: 6100/10000...  loss: 3.2344...  0.3030 sec/batch\n",
            "step: 6110/10000...  loss: 3.3007...  0.3348 sec/batch\n",
            "step: 6120/10000...  loss: 3.2880...  0.3160 sec/batch\n",
            "step: 6130/10000...  loss: 3.3619...  0.3381 sec/batch\n",
            "step: 6140/10000...  loss: 3.3051...  0.3053 sec/batch\n",
            "step: 6150/10000...  loss: 3.3559...  0.3236 sec/batch\n",
            "step: 6160/10000...  loss: 3.2439...  0.3215 sec/batch\n",
            "step: 6170/10000...  loss: 3.3956...  0.3138 sec/batch\n",
            "step: 6180/10000...  loss: 3.4223...  0.3147 sec/batch\n",
            "step: 6190/10000...  loss: 3.3937...  0.3109 sec/batch\n",
            "step: 6200/10000...  loss: 3.3077...  0.3306 sec/batch\n",
            "step: 6210/10000...  loss: 3.3813...  0.3105 sec/batch\n",
            "step: 6220/10000...  loss: 3.3416...  0.3291 sec/batch\n",
            "step: 6230/10000...  loss: 3.3803...  0.3147 sec/batch\n",
            "step: 6240/10000...  loss: 3.4179...  0.3120 sec/batch\n",
            "step: 6250/10000...  loss: 3.2255...  0.3067 sec/batch\n",
            "step: 6260/10000...  loss: 3.3499...  0.3209 sec/batch\n",
            "step: 6270/10000...  loss: 3.3163...  0.3269 sec/batch\n",
            "step: 6280/10000...  loss: 3.4701...  0.3271 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 6290/10000...  loss: 3.3207...  0.3111 sec/batch\n",
            "step: 6300/10000...  loss: 3.3710...  0.3133 sec/batch\n",
            "step: 6310/10000...  loss: 3.3750...  0.3347 sec/batch\n",
            "step: 6320/10000...  loss: 3.2542...  0.3344 sec/batch\n",
            "step: 6330/10000...  loss: 3.2631...  0.3064 sec/batch\n",
            "step: 6340/10000...  loss: 3.4645...  0.3112 sec/batch\n",
            "step: 6350/10000...  loss: 3.4444...  0.3231 sec/batch\n",
            "step: 6360/10000...  loss: 3.2248...  0.3036 sec/batch\n",
            "step: 6370/10000...  loss: 3.2514...  0.3341 sec/batch\n",
            "step: 6380/10000...  loss: 3.2590...  0.3176 sec/batch\n",
            "step: 6390/10000...  loss: 3.3279...  0.3506 sec/batch\n",
            "step: 6400/10000...  loss: 3.2932...  0.3108 sec/batch\n",
            "step: 6410/10000...  loss: 3.3311...  0.3137 sec/batch\n",
            "step: 6420/10000...  loss: 3.1643...  0.3289 sec/batch\n",
            "step: 6430/10000...  loss: 3.3686...  0.3065 sec/batch\n",
            "step: 6440/10000...  loss: 3.3933...  0.3287 sec/batch\n",
            "step: 6450/10000...  loss: 3.4045...  0.3112 sec/batch\n",
            "step: 6460/10000...  loss: 3.3084...  0.3138 sec/batch\n",
            "step: 6470/10000...  loss: 3.3150...  0.3139 sec/batch\n",
            "step: 6480/10000...  loss: 3.3649...  0.3088 sec/batch\n",
            "step: 6490/10000...  loss: 3.3379...  0.3523 sec/batch\n",
            "step: 6500/10000...  loss: 3.4286...  0.3279 sec/batch\n",
            "step: 6510/10000...  loss: 3.2447...  0.3506 sec/batch\n",
            "step: 6520/10000...  loss: 3.3421...  0.3250 sec/batch\n",
            "step: 6530/10000...  loss: 3.3450...  0.3053 sec/batch\n",
            "step: 6540/10000...  loss: 3.4325...  0.3180 sec/batch\n",
            "step: 6550/10000...  loss: 3.3024...  0.3528 sec/batch\n",
            "step: 6560/10000...  loss: 3.3715...  0.3284 sec/batch\n",
            "step: 6570/10000...  loss: 3.3495...  0.3932 sec/batch\n",
            "step: 6580/10000...  loss: 3.2122...  0.3251 sec/batch\n",
            "step: 6590/10000...  loss: 3.2157...  0.3207 sec/batch\n",
            "step: 6600/10000...  loss: 3.4787...  0.3354 sec/batch\n",
            "step: 6610/10000...  loss: 3.4142...  0.3439 sec/batch\n",
            "step: 6620/10000...  loss: 3.1633...  0.3128 sec/batch\n",
            "step: 6630/10000...  loss: 3.2525...  0.3254 sec/batch\n",
            "step: 6640/10000...  loss: 3.2499...  0.3260 sec/batch\n",
            "step: 6650/10000...  loss: 3.3293...  0.3192 sec/batch\n",
            "step: 6660/10000...  loss: 3.3072...  0.3318 sec/batch\n",
            "step: 6670/10000...  loss: 3.2907...  0.3594 sec/batch\n",
            "step: 6680/10000...  loss: 3.1923...  0.3311 sec/batch\n",
            "step: 6690/10000...  loss: 3.3300...  0.3062 sec/batch\n",
            "step: 6700/10000...  loss: 3.4043...  0.3304 sec/batch\n",
            "step: 6710/10000...  loss: 3.3489...  0.3185 sec/batch\n",
            "step: 6720/10000...  loss: 3.3200...  0.3196 sec/batch\n",
            "step: 6730/10000...  loss: 3.3080...  0.3190 sec/batch\n",
            "step: 6740/10000...  loss: 3.3215...  0.3059 sec/batch\n",
            "step: 6750/10000...  loss: 3.3977...  0.3515 sec/batch\n",
            "step: 6760/10000...  loss: 3.3721...  0.3285 sec/batch\n",
            "step: 6770/10000...  loss: 3.1774...  0.3173 sec/batch\n",
            "step: 6780/10000...  loss: 3.3492...  0.3293 sec/batch\n",
            "step: 6790/10000...  loss: 3.3130...  0.3115 sec/batch\n",
            "step: 6800/10000...  loss: 3.4374...  0.3130 sec/batch\n",
            "step: 6810/10000...  loss: 3.2965...  0.3416 sec/batch\n",
            "step: 6820/10000...  loss: 3.3148...  0.4105 sec/batch\n",
            "step: 6830/10000...  loss: 3.3264...  0.3053 sec/batch\n",
            "step: 6840/10000...  loss: 3.2276...  0.3404 sec/batch\n",
            "step: 6850/10000...  loss: 3.2099...  0.3183 sec/batch\n",
            "step: 6860/10000...  loss: 3.4336...  0.3408 sec/batch\n",
            "step: 6870/10000...  loss: 3.4153...  0.3371 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 6880/10000...  loss: 3.1623...  0.3056 sec/batch\n",
            "step: 6890/10000...  loss: 3.2241...  0.3350 sec/batch\n",
            "step: 6900/10000...  loss: 3.2443...  0.3234 sec/batch\n",
            "step: 6910/10000...  loss: 3.3349...  0.3216 sec/batch\n",
            "step: 6920/10000...  loss: 3.2614...  0.3445 sec/batch\n",
            "step: 6930/10000...  loss: 3.2930...  0.3090 sec/batch\n",
            "step: 6940/10000...  loss: 3.1615...  0.3176 sec/batch\n",
            "step: 6950/10000...  loss: 3.3637...  0.3174 sec/batch\n",
            "step: 6960/10000...  loss: 3.3576...  0.3260 sec/batch\n",
            "step: 6970/10000...  loss: 3.3851...  0.3234 sec/batch\n",
            "step: 6980/10000...  loss: 3.3164...  0.3032 sec/batch\n",
            "step: 6990/10000...  loss: 3.3072...  0.3066 sec/batch\n",
            "step: 7000/10000...  loss: 3.2948...  0.3185 sec/batch\n",
            "step: 7010/10000...  loss: 3.3539...  0.4196 sec/batch\n",
            "step: 7020/10000...  loss: 3.4111...  0.3143 sec/batch\n",
            "step: 7030/10000...  loss: 3.1734...  0.3238 sec/batch\n",
            "step: 7040/10000...  loss: 3.3039...  0.3120 sec/batch\n",
            "step: 7050/10000...  loss: 3.2881...  0.3145 sec/batch\n",
            "step: 7060/10000...  loss: 3.4290...  0.3494 sec/batch\n",
            "step: 7070/10000...  loss: 3.2524...  0.2973 sec/batch\n",
            "step: 7080/10000...  loss: 3.2703...  0.3203 sec/batch\n",
            "step: 7090/10000...  loss: 3.3012...  0.3318 sec/batch\n",
            "step: 7100/10000...  loss: 3.2307...  0.3107 sec/batch\n",
            "step: 7110/10000...  loss: 3.1877...  0.3396 sec/batch\n",
            "step: 7120/10000...  loss: 3.4140...  0.3262 sec/batch\n",
            "step: 7130/10000...  loss: 3.3763...  0.3139 sec/batch\n",
            "step: 7140/10000...  loss: 3.1230...  0.3152 sec/batch\n",
            "step: 7150/10000...  loss: 3.2333...  0.3522 sec/batch\n",
            "step: 7160/10000...  loss: 3.2506...  0.3646 sec/batch\n",
            "step: 7170/10000...  loss: 3.3220...  0.3139 sec/batch\n",
            "step: 7180/10000...  loss: 3.2422...  0.3068 sec/batch\n",
            "step: 7190/10000...  loss: 3.2480...  0.3207 sec/batch\n",
            "step: 7200/10000...  loss: 3.1786...  0.3233 sec/batch\n",
            "step: 7210/10000...  loss: 3.3310...  0.3121 sec/batch\n",
            "step: 7220/10000...  loss: 3.3874...  0.3278 sec/batch\n",
            "step: 7230/10000...  loss: 3.3027...  0.3179 sec/batch\n",
            "step: 7240/10000...  loss: 3.3132...  0.3427 sec/batch\n",
            "step: 7250/10000...  loss: 3.2664...  0.3417 sec/batch\n",
            "step: 7260/10000...  loss: 3.2775...  0.3235 sec/batch\n",
            "step: 7270/10000...  loss: 3.3208...  0.3332 sec/batch\n",
            "step: 7280/10000...  loss: 3.4209...  0.3155 sec/batch\n",
            "step: 7290/10000...  loss: 3.0895...  0.3215 sec/batch\n",
            "step: 7300/10000...  loss: 3.3185...  0.3534 sec/batch\n",
            "step: 7310/10000...  loss: 3.2558...  0.3065 sec/batch\n",
            "step: 7320/10000...  loss: 3.4093...  0.3238 sec/batch\n",
            "step: 7330/10000...  loss: 3.2516...  0.3306 sec/batch\n",
            "step: 7340/10000...  loss: 3.2992...  0.3298 sec/batch\n",
            "step: 7350/10000...  loss: 3.3422...  0.3072 sec/batch\n",
            "step: 7360/10000...  loss: 3.2042...  0.3171 sec/batch\n",
            "step: 7370/10000...  loss: 3.1816...  0.3154 sec/batch\n",
            "step: 7380/10000...  loss: 3.3904...  0.3123 sec/batch\n",
            "step: 7390/10000...  loss: 3.3974...  0.3200 sec/batch\n",
            "step: 7400/10000...  loss: 3.1356...  0.3213 sec/batch\n",
            "step: 7410/10000...  loss: 3.2551...  0.3380 sec/batch\n",
            "step: 7420/10000...  loss: 3.2382...  0.3283 sec/batch\n",
            "step: 7430/10000...  loss: 3.2855...  0.3237 sec/batch\n",
            "step: 7440/10000...  loss: 3.2191...  0.3399 sec/batch\n",
            "step: 7450/10000...  loss: 3.2972...  0.3390 sec/batch\n",
            "step: 7460/10000...  loss: 3.1575...  0.3200 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 7470/10000...  loss: 3.2988...  0.3181 sec/batch\n",
            "step: 7480/10000...  loss: 3.3620...  0.3208 sec/batch\n",
            "step: 7490/10000...  loss: 3.3343...  0.3128 sec/batch\n",
            "step: 7500/10000...  loss: 3.2574...  0.3214 sec/batch\n",
            "step: 7510/10000...  loss: 3.2677...  0.3276 sec/batch\n",
            "step: 7520/10000...  loss: 3.2838...  0.3097 sec/batch\n",
            "step: 7530/10000...  loss: 3.3416...  0.3070 sec/batch\n",
            "step: 7540/10000...  loss: 3.3776...  0.3113 sec/batch\n",
            "step: 7550/10000...  loss: 3.1506...  0.3052 sec/batch\n",
            "step: 7560/10000...  loss: 3.2498...  0.3318 sec/batch\n",
            "step: 7570/10000...  loss: 3.2316...  0.3221 sec/batch\n",
            "step: 7580/10000...  loss: 3.3557...  0.3145 sec/batch\n",
            "step: 7590/10000...  loss: 3.2678...  0.3033 sec/batch\n",
            "step: 7600/10000...  loss: 3.2692...  0.3143 sec/batch\n",
            "step: 7610/10000...  loss: 3.2650...  0.3183 sec/batch\n",
            "step: 7620/10000...  loss: 3.2266...  0.3047 sec/batch\n",
            "step: 7630/10000...  loss: 3.1280...  0.3354 sec/batch\n",
            "step: 7640/10000...  loss: 3.3648...  0.3009 sec/batch\n",
            "step: 7650/10000...  loss: 3.3885...  0.3375 sec/batch\n",
            "step: 7660/10000...  loss: 3.1392...  0.3232 sec/batch\n",
            "step: 7670/10000...  loss: 3.1840...  0.3070 sec/batch\n",
            "step: 7680/10000...  loss: 3.1947...  0.3134 sec/batch\n",
            "step: 7690/10000...  loss: 3.2949...  0.3236 sec/batch\n",
            "step: 7700/10000...  loss: 3.2633...  0.3716 sec/batch\n",
            "step: 7710/10000...  loss: 3.2874...  0.3330 sec/batch\n",
            "step: 7720/10000...  loss: 3.1269...  0.3123 sec/batch\n",
            "step: 7730/10000...  loss: 3.2974...  0.3159 sec/batch\n",
            "step: 7740/10000...  loss: 3.3255...  0.3126 sec/batch\n",
            "step: 7750/10000...  loss: 3.3594...  0.3117 sec/batch\n",
            "step: 7760/10000...  loss: 3.2584...  0.3169 sec/batch\n",
            "step: 7770/10000...  loss: 3.2482...  0.3210 sec/batch\n",
            "step: 7780/10000...  loss: 3.2560...  0.3386 sec/batch\n",
            "step: 7790/10000...  loss: 3.3025...  0.3127 sec/batch\n",
            "step: 7800/10000...  loss: 3.3511...  0.3091 sec/batch\n",
            "step: 7810/10000...  loss: 3.1368...  0.3243 sec/batch\n",
            "step: 7820/10000...  loss: 3.2819...  0.3078 sec/batch\n",
            "step: 7830/10000...  loss: 3.2117...  0.3363 sec/batch\n",
            "step: 7840/10000...  loss: 3.3885...  0.3629 sec/batch\n",
            "step: 7850/10000...  loss: 3.1925...  0.3302 sec/batch\n",
            "step: 7860/10000...  loss: 3.2918...  0.3187 sec/batch\n",
            "step: 7870/10000...  loss: 3.2930...  0.3029 sec/batch\n",
            "step: 7880/10000...  loss: 3.1710...  0.3149 sec/batch\n",
            "step: 7890/10000...  loss: 3.1881...  0.3157 sec/batch\n",
            "step: 7900/10000...  loss: 3.3762...  0.3466 sec/batch\n",
            "step: 7910/10000...  loss: 3.3545...  0.3503 sec/batch\n",
            "step: 7920/10000...  loss: 3.1478...  0.3505 sec/batch\n",
            "step: 7930/10000...  loss: 3.2087...  0.3150 sec/batch\n",
            "step: 7940/10000...  loss: 3.1928...  0.3506 sec/batch\n",
            "step: 7950/10000...  loss: 3.3092...  0.3103 sec/batch\n",
            "step: 7960/10000...  loss: 3.2053...  0.3158 sec/batch\n",
            "step: 7970/10000...  loss: 3.2184...  0.3281 sec/batch\n",
            "step: 7980/10000...  loss: 3.1255...  0.3679 sec/batch\n",
            "step: 7990/10000...  loss: 3.2897...  0.3040 sec/batch\n",
            "step: 8000/10000...  loss: 3.3263...  0.3155 sec/batch\n",
            "step: 8010/10000...  loss: 3.3062...  0.3593 sec/batch\n",
            "step: 8020/10000...  loss: 3.2555...  0.3240 sec/batch\n",
            "step: 8030/10000...  loss: 3.2169...  0.3037 sec/batch\n",
            "step: 8040/10000...  loss: 3.2185...  0.3152 sec/batch\n",
            "step: 8050/10000...  loss: 3.3121...  0.3085 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 8060/10000...  loss: 3.3904...  0.3239 sec/batch\n",
            "step: 8070/10000...  loss: 3.1385...  0.3365 sec/batch\n",
            "step: 8080/10000...  loss: 3.3019...  0.3539 sec/batch\n",
            "step: 8090/10000...  loss: 3.2182...  0.3623 sec/batch\n",
            "step: 8100/10000...  loss: 3.3644...  0.3144 sec/batch\n",
            "step: 8110/10000...  loss: 3.2205...  0.3284 sec/batch\n",
            "step: 8120/10000...  loss: 3.2473...  0.3095 sec/batch\n",
            "step: 8130/10000...  loss: 3.3031...  0.3243 sec/batch\n",
            "step: 8140/10000...  loss: 3.1995...  0.3195 sec/batch\n",
            "step: 8150/10000...  loss: 3.1387...  0.3412 sec/batch\n",
            "step: 8160/10000...  loss: 3.3365...  0.3195 sec/batch\n",
            "step: 8170/10000...  loss: 3.3399...  0.3286 sec/batch\n",
            "step: 8180/10000...  loss: 3.1500...  0.3577 sec/batch\n",
            "step: 8190/10000...  loss: 3.1968...  0.3006 sec/batch\n",
            "step: 8200/10000...  loss: 3.1907...  0.3115 sec/batch\n",
            "step: 8210/10000...  loss: 3.2628...  0.3181 sec/batch\n",
            "step: 8220/10000...  loss: 3.2180...  0.3045 sec/batch\n",
            "step: 8230/10000...  loss: 3.2431...  0.3318 sec/batch\n",
            "step: 8240/10000...  loss: 3.0974...  0.3462 sec/batch\n",
            "step: 8250/10000...  loss: 3.2945...  0.3203 sec/batch\n",
            "step: 8260/10000...  loss: 3.2867...  0.3567 sec/batch\n",
            "step: 8270/10000...  loss: 3.2817...  0.3076 sec/batch\n",
            "step: 8280/10000...  loss: 3.2499...  0.3079 sec/batch\n",
            "step: 8290/10000...  loss: 3.2344...  0.3708 sec/batch\n",
            "step: 8300/10000...  loss: 3.2364...  0.3089 sec/batch\n",
            "step: 8310/10000...  loss: 3.2509...  0.3232 sec/batch\n",
            "step: 8320/10000...  loss: 3.3110...  0.3160 sec/batch\n",
            "step: 8330/10000...  loss: 3.1442...  0.3091 sec/batch\n",
            "step: 8340/10000...  loss: 3.2928...  0.3310 sec/batch\n",
            "step: 8350/10000...  loss: 3.1794...  0.3647 sec/batch\n",
            "step: 8360/10000...  loss: 3.3666...  0.4161 sec/batch\n",
            "step: 8370/10000...  loss: 3.2324...  0.3220 sec/batch\n",
            "step: 8380/10000...  loss: 3.2170...  0.3330 sec/batch\n",
            "step: 8390/10000...  loss: 3.2769...  0.3128 sec/batch\n",
            "step: 8400/10000...  loss: 3.1591...  0.3092 sec/batch\n",
            "step: 8410/10000...  loss: 3.1293...  0.3091 sec/batch\n",
            "step: 8420/10000...  loss: 3.3152...  0.3335 sec/batch\n",
            "step: 8430/10000...  loss: 3.3480...  0.3927 sec/batch\n",
            "step: 8440/10000...  loss: 3.0834...  0.4002 sec/batch\n",
            "step: 8450/10000...  loss: 3.2049...  0.3251 sec/batch\n",
            "step: 8460/10000...  loss: 3.1784...  0.3294 sec/batch\n",
            "step: 8470/10000...  loss: 3.2458...  0.3164 sec/batch\n",
            "step: 8480/10000...  loss: 3.2333...  0.3315 sec/batch\n",
            "step: 8490/10000...  loss: 3.2118...  0.3200 sec/batch\n",
            "step: 8500/10000...  loss: 3.0838...  0.3049 sec/batch\n",
            "step: 8510/10000...  loss: 3.2669...  0.3615 sec/batch\n",
            "step: 8520/10000...  loss: 3.2896...  0.3804 sec/batch\n",
            "step: 8530/10000...  loss: 3.2551...  0.3274 sec/batch\n",
            "step: 8540/10000...  loss: 3.2157...  0.3148 sec/batch\n",
            "step: 8550/10000...  loss: 3.2087...  0.3640 sec/batch\n",
            "step: 8560/10000...  loss: 3.2282...  0.3081 sec/batch\n",
            "step: 8570/10000...  loss: 3.2541...  0.3384 sec/batch\n",
            "step: 8580/10000...  loss: 3.2606...  0.4182 sec/batch\n",
            "step: 8590/10000...  loss: 3.1056...  0.3084 sec/batch\n",
            "step: 8600/10000...  loss: 3.2377...  0.4165 sec/batch\n",
            "step: 8610/10000...  loss: 3.2136...  0.3227 sec/batch\n",
            "step: 8620/10000...  loss: 3.3749...  0.3076 sec/batch\n",
            "step: 8630/10000...  loss: 3.1993...  0.3176 sec/batch\n",
            "step: 8640/10000...  loss: 3.2638...  0.3352 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 8650/10000...  loss: 3.2294...  0.3578 sec/batch\n",
            "step: 8660/10000...  loss: 3.1542...  0.3055 sec/batch\n",
            "step: 8670/10000...  loss: 3.1177...  0.3082 sec/batch\n",
            "step: 8680/10000...  loss: 3.3315...  0.3306 sec/batch\n",
            "step: 8690/10000...  loss: 3.3303...  0.3340 sec/batch\n",
            "step: 8700/10000...  loss: 3.0779...  0.3048 sec/batch\n",
            "step: 8710/10000...  loss: 3.1721...  0.3125 sec/batch\n",
            "step: 8720/10000...  loss: 3.1968...  0.3133 sec/batch\n",
            "step: 8730/10000...  loss: 3.2540...  0.3075 sec/batch\n",
            "step: 8740/10000...  loss: 3.2391...  0.3262 sec/batch\n",
            "step: 8750/10000...  loss: 3.2135...  0.3191 sec/batch\n",
            "step: 8760/10000...  loss: 3.0627...  0.3260 sec/batch\n",
            "step: 8770/10000...  loss: 3.2197...  0.3059 sec/batch\n",
            "step: 8780/10000...  loss: 3.2424...  0.3762 sec/batch\n",
            "step: 8790/10000...  loss: 3.2671...  0.3259 sec/batch\n",
            "step: 8800/10000...  loss: 3.2056...  0.3234 sec/batch\n",
            "step: 8810/10000...  loss: 3.2270...  0.3070 sec/batch\n",
            "step: 8820/10000...  loss: 3.2309...  0.3220 sec/batch\n",
            "step: 8830/10000...  loss: 3.2549...  0.3322 sec/batch\n",
            "step: 8840/10000...  loss: 3.2991...  0.3184 sec/batch\n",
            "step: 8850/10000...  loss: 3.1412...  0.3274 sec/batch\n",
            "step: 8860/10000...  loss: 3.2178...  0.3722 sec/batch\n",
            "step: 8870/10000...  loss: 3.1530...  0.3162 sec/batch\n",
            "step: 8880/10000...  loss: 3.3398...  0.3231 sec/batch\n",
            "step: 8890/10000...  loss: 3.2101...  0.3702 sec/batch\n",
            "step: 8900/10000...  loss: 3.2393...  0.4616 sec/batch\n",
            "step: 8910/10000...  loss: 3.2250...  0.3629 sec/batch\n",
            "step: 8920/10000...  loss: 3.1390...  0.3224 sec/batch\n",
            "step: 8930/10000...  loss: 3.0451...  0.4069 sec/batch\n",
            "step: 8940/10000...  loss: 3.2857...  0.3391 sec/batch\n",
            "step: 8950/10000...  loss: 3.3494...  0.3126 sec/batch\n",
            "step: 8960/10000...  loss: 3.0726...  0.3015 sec/batch\n",
            "step: 8970/10000...  loss: 3.1308...  0.3245 sec/batch\n",
            "step: 8980/10000...  loss: 3.1821...  0.3063 sec/batch\n",
            "step: 8990/10000...  loss: 3.2460...  0.4287 sec/batch\n",
            "step: 9000/10000...  loss: 3.2032...  0.3020 sec/batch\n",
            "step: 9010/10000...  loss: 3.1954...  0.3316 sec/batch\n",
            "step: 9020/10000...  loss: 3.0546...  0.3135 sec/batch\n",
            "step: 9030/10000...  loss: 3.2273...  0.3923 sec/batch\n",
            "step: 9040/10000...  loss: 3.2736...  0.3033 sec/batch\n",
            "step: 9050/10000...  loss: 3.2285...  0.3443 sec/batch\n",
            "step: 9060/10000...  loss: 3.1928...  0.3186 sec/batch\n",
            "step: 9070/10000...  loss: 3.2211...  0.3363 sec/batch\n",
            "step: 9080/10000...  loss: 3.1986...  0.3015 sec/batch\n",
            "step: 9090/10000...  loss: 3.2688...  0.3588 sec/batch\n",
            "step: 9100/10000...  loss: 3.2571...  0.3823 sec/batch\n",
            "step: 9110/10000...  loss: 3.1084...  0.3762 sec/batch\n",
            "step: 9120/10000...  loss: 3.2348...  0.3612 sec/batch\n",
            "step: 9130/10000...  loss: 3.1911...  0.3721 sec/batch\n",
            "step: 9140/10000...  loss: 3.3045...  0.3695 sec/batch\n",
            "step: 9150/10000...  loss: 3.1692...  0.4018 sec/batch\n",
            "step: 9160/10000...  loss: 3.1913...  0.3192 sec/batch\n",
            "step: 9170/10000...  loss: 3.1722...  0.3437 sec/batch\n",
            "step: 9180/10000...  loss: 3.1678...  0.3619 sec/batch\n",
            "step: 9190/10000...  loss: 3.0893...  0.3371 sec/batch\n",
            "step: 9200/10000...  loss: 3.3129...  0.3595 sec/batch\n",
            "step: 9210/10000...  loss: 3.2847...  0.4454 sec/batch\n",
            "step: 9220/10000...  loss: 3.1234...  0.3550 sec/batch\n",
            "step: 9230/10000...  loss: 3.1202...  0.3632 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 9240/10000...  loss: 3.1642...  0.3493 sec/batch\n",
            "step: 9250/10000...  loss: 3.2155...  0.3782 sec/batch\n",
            "step: 9260/10000...  loss: 3.1908...  0.3097 sec/batch\n",
            "step: 9270/10000...  loss: 3.2232...  0.3924 sec/batch\n",
            "step: 9280/10000...  loss: 3.0476...  0.3672 sec/batch\n",
            "step: 9290/10000...  loss: 3.1980...  0.3843 sec/batch\n",
            "step: 9300/10000...  loss: 3.2316...  0.3299 sec/batch\n",
            "step: 9310/10000...  loss: 3.2401...  0.3079 sec/batch\n",
            "step: 9320/10000...  loss: 3.2051...  0.3077 sec/batch\n",
            "step: 9330/10000...  loss: 3.1876...  0.3002 sec/batch\n",
            "step: 9340/10000...  loss: 3.2034...  0.3062 sec/batch\n",
            "step: 9350/10000...  loss: 3.2517...  0.3177 sec/batch\n",
            "step: 9360/10000...  loss: 3.2540...  0.3104 sec/batch\n",
            "step: 9370/10000...  loss: 3.0868...  0.3347 sec/batch\n",
            "step: 9380/10000...  loss: 3.1699...  0.3104 sec/batch\n",
            "step: 9390/10000...  loss: 3.1867...  0.3267 sec/batch\n",
            "step: 9400/10000...  loss: 3.3379...  0.3069 sec/batch\n",
            "step: 9410/10000...  loss: 3.1551...  0.3352 sec/batch\n",
            "step: 9420/10000...  loss: 3.1685...  0.3188 sec/batch\n",
            "step: 9430/10000...  loss: 3.1777...  0.3131 sec/batch\n",
            "step: 9440/10000...  loss: 3.1411...  0.3131 sec/batch\n",
            "step: 9450/10000...  loss: 3.0877...  0.3135 sec/batch\n",
            "step: 9460/10000...  loss: 3.3133...  0.3007 sec/batch\n",
            "step: 9470/10000...  loss: 3.2650...  0.3144 sec/batch\n",
            "step: 9480/10000...  loss: 3.0572...  0.3231 sec/batch\n",
            "step: 9490/10000...  loss: 3.1727...  0.3153 sec/batch\n",
            "step: 9500/10000...  loss: 3.1691...  0.3387 sec/batch\n",
            "step: 9510/10000...  loss: 3.2173...  0.3115 sec/batch\n",
            "step: 9520/10000...  loss: 3.1775...  0.3181 sec/batch\n",
            "step: 9530/10000...  loss: 3.1452...  0.3267 sec/batch\n",
            "step: 9540/10000...  loss: 3.0606...  0.3385 sec/batch\n",
            "step: 9550/10000...  loss: 3.2293...  0.3061 sec/batch\n",
            "step: 9560/10000...  loss: 3.2285...  0.3266 sec/batch\n",
            "step: 9570/10000...  loss: 3.1815...  0.3187 sec/batch\n",
            "step: 9580/10000...  loss: 3.1902...  0.3049 sec/batch\n",
            "step: 9590/10000...  loss: 3.1574...  0.3221 sec/batch\n",
            "step: 9600/10000...  loss: 3.2022...  0.3164 sec/batch\n",
            "step: 9610/10000...  loss: 3.2311...  0.3395 sec/batch\n",
            "step: 9620/10000...  loss: 3.2575...  0.3259 sec/batch\n",
            "step: 9630/10000...  loss: 3.0658...  0.3335 sec/batch\n",
            "step: 9640/10000...  loss: 3.1704...  0.3184 sec/batch\n",
            "step: 9650/10000...  loss: 3.1346...  0.3250 sec/batch\n",
            "step: 9660/10000...  loss: 3.2999...  0.3110 sec/batch\n",
            "step: 9670/10000...  loss: 3.1509...  0.3238 sec/batch\n",
            "step: 9680/10000...  loss: 3.2025...  0.3209 sec/batch\n",
            "step: 9690/10000...  loss: 3.1963...  0.3092 sec/batch\n",
            "step: 9700/10000...  loss: 3.1393...  0.3354 sec/batch\n",
            "step: 9710/10000...  loss: 3.0508...  0.3166 sec/batch\n",
            "step: 9720/10000...  loss: 3.2764...  0.3266 sec/batch\n",
            "step: 9730/10000...  loss: 3.3006...  0.3335 sec/batch\n",
            "step: 9740/10000...  loss: 3.0541...  0.3164 sec/batch\n",
            "step: 9750/10000...  loss: 3.1417...  0.3032 sec/batch\n",
            "step: 9760/10000...  loss: 3.1056...  0.3080 sec/batch\n",
            "step: 9770/10000...  loss: 3.2394...  0.3289 sec/batch\n",
            "step: 9780/10000...  loss: 3.1666...  0.3105 sec/batch\n",
            "step: 9790/10000...  loss: 3.2028...  0.3227 sec/batch\n",
            "step: 9800/10000...  loss: 3.0550...  0.3166 sec/batch\n",
            "step: 9810/10000...  loss: 3.2708...  0.3204 sec/batch\n",
            "step: 9820/10000...  loss: 3.2640...  0.3010 sec/batch\n"
          ],
          "name": "stdout"
        },
        {
          "output_type": "stream",
          "text": [
            "step: 9830/10000...  loss: 3.2557...  0.3160 sec/batch\n",
            "step: 9840/10000...  loss: 3.1933...  0.3166 sec/batch\n",
            "step: 9850/10000...  loss: 3.1658...  0.3146 sec/batch\n",
            "step: 9860/10000...  loss: 3.1742...  0.3244 sec/batch\n",
            "step: 9870/10000...  loss: 3.2502...  0.3032 sec/batch\n",
            "step: 9880/10000...  loss: 3.2122...  0.3240 sec/batch\n",
            "step: 9890/10000...  loss: 3.0794...  0.3075 sec/batch\n",
            "step: 9900/10000...  loss: 3.1849...  0.3255 sec/batch\n",
            "step: 9910/10000...  loss: 3.1823...  0.3157 sec/batch\n",
            "step: 9920/10000...  loss: 3.3323...  0.3082 sec/batch\n",
            "step: 9930/10000...  loss: 3.1534...  0.3184 sec/batch\n",
            "step: 9940/10000...  loss: 3.1557...  0.3174 sec/batch\n",
            "step: 9950/10000...  loss: 3.1723...  0.3199 sec/batch\n",
            "step: 9960/10000...  loss: 3.1096...  0.3080 sec/batch\n",
            "step: 9970/10000...  loss: 3.0905...  0.3286 sec/batch\n",
            "step: 9980/10000...  loss: 3.2738...  0.3113 sec/batch\n",
            "step: 9990/10000...  loss: 3.2860...  0.3190 sec/batch\n",
            "step: 10000/10000...  loss: 3.0485...  0.3190 sec/batch\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "9wIkdX1QYN--",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 894
        },
        "outputId": "b83b571b-b019-4ee1-b951-3852b52b4c9d"
      },
      "cell_type": "code",
      "source": [
        "!cd Char-RNN-TensorFlow && python sample.py --converter_path model/writings/converter.pkl --checkpoint_path  model/writings --use_embedding --max_length 2000 --num_layers 3 --lstm_size 256 --embedding_size 256"
      ],
      "execution_count": 14,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:From /content/Char-RNN-TensorFlow/model.py:93: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\r\n",
            "Instructions for updating:\r\n",
            "\r\n",
            "Future major versions of TensorFlow will allow gradients to flow\r\n",
            "into the labels input on backprop by default.\r\n",
            "\r\n",
            "See @{tf.nn.softmax_cross_entropy_with_logits_v2}.\r\n",
            "\n",
            "2018-07-28 15:51:34.229249: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:897] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2018-07-28 15:51:34.229797: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1392] Found device 0 with properties: \n",
            "name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235\n",
            "pciBusID: 0000:00:04.0\n",
            "totalMemory: 11.17GiB freeMemory: 11.10GiB\n",
            "2018-07-28 15:51:34.229845: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0\n",
            "2018-07-28 15:51:34.641848: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
            "2018-07-28 15:51:34.641926: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958]      0 \n",
            "2018-07-28 15:51:34.641953: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0:   N \n",
            "2018-07-28 15:51:34.642401: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10763 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7)\n",
            "Restored from: model/writings/model-10000\n",
            "<unk>，我们不愿意面对我自己不愿意面对自己有时光。他们是一种一切的一切，我们不能在别人懂得自己的生命状态和自己，而我们的意愿是在我们自己的意愿，在这个世界无限之后的时间，他们的人生就是在心的意识成长来的时候，就不能使你感觉到我的心理时。\n",
            "\n",
            "我们在自己想得的东西都不过是一种人类的时候，就会使这种复杂的空，这些事情不过是一个人，而不会被蜿蜒的心情和平衡。\n",
            "\n",
            "在一种语言中发现了一点理想主张的人，就是对于人类人类历史时候。\n",
            "\n",
            "人生的是这种意识形态的发展是我生活的特意，不过是一个不能忍受一个人，可以用来做我人的一个人。人们不再在一个个人都有些人们的生命，我们不要在自己的生产状态，或许自己为了一些不愿意的人生都会不会以一个个一个主观时一步的人的事情，可这一个人的心，我的一个基本消灭，在自己孜孜可以不可能做得一种无垠。我们不再有一个人的心，可是在我们不断成为一个人的主体和客宰在的人们。我们不是我们的信仰和我们自己的自己，那些我们对自己的生命，是有人的意义是否是自己的生命和人们生活的夹料，在我们的心情一直就是我的一种优先方，我们在一个世界无法做成，也没有一种无疑，而是我们自己的意志和心实生活，我们不能在自己的人生和自己。我们对我的心理平衡点不是一个人的主张的基础，是我们的意志与心理生存，这一切就是自己的心理平衡，可是在你们自身地做到这个世界的时候，又没有一个人的人，可在于这个世界的人生，我们在一种自我辩护的人，在自己的生存状态和人生，那么我只是这种有限的人，我要不愿意面对这个事情的时间，不过会不是一样的心情。我们在这世界上要做一个人的人，可不过是一种人的需要我们在一个人都是我们的心中。\n",
            "\n",
            "在此一个人的一个时间里，我们的人生的人类在心中无法不断，可是我们不会做到的事件，不是因为这些自己的生命和无力；我们的思维不可能鼓缩了这种道理、真相、自己的意识形态不同的态度。我们不要忘记一个自己，害怕一个自己的意义，我们不要揭坏自己的心灵和平衡、丑恶和我自己和自己的生命。\n",
            "\n",
            "人类对我生存性生命的平衡和一个自觉，在我们不知意做了自己的自己，而不是一个人，不能在人们留下了不断的事情。\n",
            "\n",
            "如果我们想要自己的一个信仰，我们对这个世界的人生，是一个有小的事情。然而我们的信仰和自己的生存，就有一个人在自身渗灭了，是我们不知道这一点。\n",
            "\n",
            "不能\n",
            "----\n",
            "\n",
            "我们看见一种思维，仔细的时候是一个自己辩护的人们的思想和一个对象的人的事情和实在，而不是因为它们对这个世界的基础，而是一个有法律的人。人们在这种世界上不过是用了人的存在方式，是那种一种一个个人的一个人。人们在一个碳不可靠的牛因人的理解。我们在这个世界里成为一个人的人的事情，我们可以接受一个人的世界。一个人的人的心，一个人的心，可在我们在一个人似样地做的，是我们一个人在自己的生命力，我们的主体的主体和自己在自己的自己和自己的世界无力和心情的真实，我们可以在人类历史一些自己力量的方式，不能被实现方一个主体）的存在浓缩着这个世界的一切；我们就能在人类在人生中不可能被迫到人生命中时的一个人在人格之中一种人类的意志和实际性的存在的意识形态和一个人类的基础和历史，使你们在人格的自我，然而这样，他的心理中无法做了，又可以不可挡用我们自己的方式不能成为我们的意愿。我们在一种无数湮亡的一面，我的心灵不能不可抗拒的人生都会放弃对我生活的意志。\n",
            "\n",
            "一个人不能调成自己的生存状态，我的意义不是一个一种一个个人的事情；一种人的世界，不能在自己的世界里，我也会不会做成自己的世界，我们不再是“爱所有的事情。\n",
            "\n",
            "如此一个人在一个人在一切地做的是人的人。但那个人们在一个人的双方和应该有机制的时间，我们就会在这一个个人只有这种世界的时候，我会在这世界不可能把人类留起了无限和研始，可是一个有人之间不过是自己的世界。一切不能不能解决自己。\n",
            "\n",
            "我是一个人在人生的一面。在自己的意识形态和基石，不过是用人生的理想，篡这个人的心情一定会在一种自己，因为我们的意愿不能及自己在自己和自己的心理平衡点，可是深陷了不可能地变得臃燥乏力。\n",
            "\n",
            "如果我们不能说那种世界不能保定这个世界。\n",
            "\n",
            "我们要看见自己的时候，我们都像这个字，一个人的关键，我们在我们的意愿不是在一个人类生活中成为一个人类的主体的基础和资本和一点，我在这个世界里成为一个主体的期望，这些人们在这种人生的生产力量的一种基本性。可是我们被迫在这些世界上的人们，我们在我们的心理挣扎，不可是是自己不愿意的人们在于自己的意义和意义，而我们在我们的心情平静也不是一样，而且我们不会看着自己的时间去做了，这是一种无法自然的自己，我们还是一个人的人的事情和一个人，有的碎片会在你们做了一个有自由的一个副产生。\n",
            "\n",
            "我们对我们被文本和人类的生活状态，在人们的人生，是我们的意义，我们还是一个人的意义。\n",
            "\n",
            "在这个时候，我就是一种不喜欢的事情，在我们的心中\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "3-IoLbEYYiPN",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 1280
        },
        "outputId": "f642806c-87cb-4caf-a56f-9874eaf04445"
      },
      "cell_type": "code",
      "source": [
        "!cd Char-RNN-TensorFlow && python sample.py --converter_path model/writings/converter.pkl --checkpoint_path  model/writings --use_embedding --max_length 2000 --num_layers 3 --lstm_size 256 --embedding_size 256"
      ],
      "execution_count": 15,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:From /content/Char-RNN-TensorFlow/model.py:93: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\r\n",
            "Instructions for updating:\r\n",
            "\r\n",
            "Future major versions of TensorFlow will allow gradients to flow\r\n",
            "into the labels input on backprop by default.\r\n",
            "\r\n",
            "See @{tf.nn.softmax_cross_entropy_with_logits_v2}.\r\n",
            "\n",
            "2018-07-28 15:55:27.711992: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:897] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2018-07-28 15:55:27.712659: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1392] Found device 0 with properties: \n",
            "name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235\n",
            "pciBusID: 0000:00:04.0\n",
            "totalMemory: 11.17GiB freeMemory: 11.10GiB\n",
            "2018-07-28 15:55:27.712720: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0\n",
            "2018-07-28 15:55:28.291497: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
            "2018-07-28 15:55:28.291580: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958]      0 \n",
            "2018-07-28 15:55:28.291611: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0:   N \n",
            "2018-07-28 15:55:28.292032: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10763 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7)\n",
            "Restored from: model/writings/model-10000\n",
            "况远的东西。我是一个人在自己一个人的心情中的事情一直会成为一种人类历史时间的发展，不是一个人的一个小人，不是不过是理想的事情。』\n",
            "\n",
            "摘抄来源：http://www.zouban.com/neting/nevanitic/209962912949/\n",
            "\n",
            "http://book.doubing.com/rever/2836999321/\n",
            "\n",
            "阴谋论\n",
            "-----\n",
            "\n",
            "> 『在我们不可怕的，所有的人都要不会没有人生，而没有做到自己，但我们不会被因为自己，害怕我们，在自己的生存状态。我们对自由一点执着的时候，不会不过是一种自由和自由和孤寂地使自己的意愿，在一个人的心理时候就会不能重复一件人，一切都是自由，不会是一种不可不能合心的一个副命境的，只有当时的平衡点去有了不可能的。』\n",
            "\n",
            "摘抄来源：http://book.douban.com/note/123446669967533/\n",
            "\n",
            "http://www.douban.com/notiew/274619/]hom/ronaliting/13939/\n",
            "\n",
            "http://baca.com/revilw//www.doubin.com/note/1639662346039339/\n",
            "\n",
            "排序\n",
            "----\n",
            "\n",
            "> 『人呵，“我自己不再是自己的生活力量的一个毒瘤。而是我们各一个事，很好，是人们不能懂得自己的一个主观，不是在人类来做了自己和人类的一切对于自我不能自由。而我们的人生的自己在这个主体的一个基本体在，是一种政治时候，在我们的人生都无法抗拒这个社会，不过是自己的世界。\n",
            "\n",
            "在某种人类在人的信图保护自己，这些一点不能在人生中涓涓的滋环。\n",
            "\n",
            "我们在我们的生产力的桎梏，在人生的文字中不断到一个一种不祥的火焰。\n",
            "\n",
            "我们不能看见一个痴情，在人生的人生的意义里不能成为我们所有的世界，那些我们的心中一定会在自己的心灵平衡的心情和习准和不断上的，只能一种带不到人的人。\n",
            "\n",
            "如果神有一种不平地。\n",
            "\n",
            "我们在一起不是我，我们在自己想想的事件中，我们不愿意做这种人们，可是一定会有一种可能的人的事情。我们在这些世界上成为这些意愿形态的时候，我们不会让它看到一个东西，可是在这一种人性里的一点。这种世界，在我们对这种不可遏制的人，不能因为别人的意义和在于人类生活中无法消灭和人的人，人们的一个基本消灭，我们还是会做到这些一种人类的存在。\n",
            "\n",
            "我们对于每个主体时间，我们还是在自己的人生和巨大而徒破人地的人，这种不可承担的人，在自己不能看出自己的世界，在人的意义在一切可能成为的一个事情，我们在这种世界不能被放弃。\n",
            "\n",
            "人们的文源是不断上升我们心理学，不能不是因为他的心灵时间。\n",
            "\n",
            "我们将一起被灌输来地做自己的人，在自己的人生，不过是自己的生命，我们是这个世界。我们是一个人的人，我们的长辈们，只有一个平衡的人们，只是我们有一种人类的生存，我们在我们的意义和价值观意成为自己不能做出的。他们在一个世界里不断改变的事情，都会不能被蜿蜒的，是人类的心理平衡点；在这个世界示快的一种不平的人？这一点的是人的人，是人类为人生中的人生的一个人的事情的时间，我们不能做了。\n",
            "\n",
            "我的誓气锋常，是一个不能做的事情。我们徜徉在于一个主体的基分。人生的人们在于一个人的人的世界。这个人们的梦想，我不能使这种世界上的一切砖上。\n",
            "\n",
            "这样的话，我们在一种自我心念生活的轰轰丛烈》和人生，一切会看到这个事情的事情。我们被放弃，我们不会不能做来，我们不能让人们做起了自觉的自己。\n",
            "\n",
            "人们的思维来不清楚。我们一个人在人类和人生，不过是一个人的心。一切都是为什么不可以不过是一种人生的生活。\n",
            "\n",
            "我在人生的人生和一切，在一个人修着自己的人们，无法摆脱自己的生存，我们不能抵报我们心情的心情。\n",
            "\n",
            "我不愿意只有一切可以让我们的思想自由一种被不可能性。这是一种被美术的栖居、不得和自己的力量的时候，我们的心脏衰弱起了我们不愿白了我们，不愿意始终不能因为一个慎重的世界和自由和自由的世界，我们是一个不是一种不祥的态量，使人不可以被改变一个人生的时间变得不断的路途。而且，这是一种绝望的不能使一个人在世界的一个人的世界，我们在这个世界里不是这种贴则，从而心灵不足地无限。但是我们在自己所能做的，是一种无力的事情，也是一个有限的时候，我已经能够做起来。\n",
            "\n",
            "这样的一切就是一个人，是人们无法做成。所以，我们不是在自觉的人格。我们的焦虑与恐惧不能懂得。\n",
            "\n",
            "如果没有勇气，就是一种不可承担，因为它们不过是一起自己的心中不可以做到的地方。\n",
            "\n",
            "我们在自己的心中平等和自己的意识在于这种不可能的人们，然后这种不是我的人，是否没有自己的愿望和不可能。因为你不能不能改变我们对于人类人格的人格。我们不能看见这种不能忍受了我们的心理时光，就不能是这些烂泥塘的碎片一样的心情。\n",
            "\n",
            "我们将不是自己和不愿意的自己和自己，不会不是这样的一个小小，一种无法不断地不能重新\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "isVgaiaamfty",
        "colab_type": "code",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 978
        },
        "outputId": "1381ebee-a53a-4f3f-933a-c5085414c14b"
      },
      "cell_type": "code",
      "source": [
        "!cd Char-RNN-TensorFlow && python sample.py --converter_path model/writings/converter.pkl --checkpoint_path  model/writings --use_embedding --max_length 2000 --num_layers 3 --lstm_size 256 --embedding_size 256"
      ],
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "text": [
            "WARNING:tensorflow:From /content/Char-RNN-TensorFlow/model.py:93: softmax_cross_entropy_with_logits (from tensorflow.python.ops.nn_ops) is deprecated and will be removed in a future version.\r\n",
            "Instructions for updating:\r\n",
            "\r\n",
            "Future major versions of TensorFlow will allow gradients to flow\r\n",
            "into the labels input on backprop by default.\r\n",
            "\r\n",
            "See @{tf.nn.softmax_cross_entropy_with_logits_v2}.\r\n",
            "\n",
            "2018-07-28 15:56:24.743002: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:897] successful NUMA node read from SysFS had negative value (-1), but there must be at least one NUMA node, so returning NUMA node zero\n",
            "2018-07-28 15:56:24.743732: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1392] Found device 0 with properties: \n",
            "name: Tesla K80 major: 3 minor: 7 memoryClockRate(GHz): 0.8235\n",
            "pciBusID: 0000:00:04.0\n",
            "totalMemory: 11.17GiB freeMemory: 11.10GiB\n",
            "2018-07-28 15:56:24.743790: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1471] Adding visible gpu devices: 0\n",
            "2018-07-28 15:56:25.285580: I tensorflow/core/common_runtime/gpu/gpu_device.cc:952] Device interconnect StreamExecutor with strength 1 edge matrix:\n",
            "2018-07-28 15:56:25.285684: I tensorflow/core/common_runtime/gpu/gpu_device.cc:958]      0 \n",
            "2018-07-28 15:56:25.285716: I tensorflow/core/common_runtime/gpu/gpu_device.cc:971] 0:   N \n",
            "2018-07-28 15:56:25.286206: I tensorflow/core/common_runtime/gpu/gpu_device.cc:1084] Created TensorFlow device (/job:localhost/replica:0/task:0/device:GPU:0 with 10763 MB memory) -> physical GPU (device: 0, name: Tesla K80, pci bus id: 0000:00:04.0, compute capability: 3.7)\n",
            "Restored from: model/writings/model-10000\n",
            "睛\n",
            "    \n",
            "-   惟有什么我不想做，只是因为这一种不能是一种不可能的事情，都是在这个世界的事情，不会有时候，有的人们在一个人自己的心理平衡点去不得不能重复自己，这些世界的真正历史和不够自己的自由，而这是我们，只有自己，这是一样的事情，都是在这种过程中，不能因为这种不知是一些自觉的一个基本中在一个人在个人的人，是自己不在卑水的时间，都有一种无法，在人们的意义就被在这些世界上做了。』\n",
            "\n",
            "我在世界的一个角度，这是一个不可逾越的人，在我们对自己的自己。\n",
            "\n",
            "我不愿再做这个社会的存在。我们不再是一种“大””。\n",
            "\n",
            "我们对这个人，我们怎么不想在这个世界，不过是不是一切“破坏的心，不能被破坏。\n",
            "\n",
            "一种亢趣，不是理论，有一个词是一定有人生的人，不能不可以改变演成一种不断的火潭。这是一个个人都不能不过，不是我们是人的一种淡漠的时间。\n",
            "\n",
            "人与世界，在这些世界里成为一个人的人。我们在自己所做的，所以我们的心理无法，我们不会再做人们做出了的人，我们还是一些人的人的人。我们在一切不能汲取力量和这个主体，我们还是在人类共同界，以为这个世界里，这样的一个孩子，是那种不可挽用的，另外一个人头都攫取和动力的机制，而无疑是有据一定之间，才会有一个人的一切的心理人，这个一个主体的一件人的智慧，是一种自我辩护，不能因为自己的意愿。\n",
            "\n",
            "我们在我们的思维依附一个正常的人，一个人对于这样的人生都是无力的事情，不是一个人，而是自己的人生就是无数者不能不断，可以不是我们对自己的自我不愿，我们的心灵无力的地方稍稍一点就被一个人的心，一个是人生的一面。我们不再不能看到我们的信点和一切，我们不要不能在我们在自己所有的初衷，我们在自己，做到了我们的心灵的心。我们在我们的思考，我们不会再做自己的生存。\n",
            "\n",
            "这些人，我的心情无法不清楚，我们的信念和一种淡淡。这种不是诗。我们不能抵怨一切淡泊的心情，让人们在人生里的一面。人的生命无法不可断在这个世界的人生，我们不能做的，是我们自己的意志，是一个自我不断地做到。我是一个不能踩心中的时候，它让人做得不断的态度，在一个人的一个人的事情发展了我们的心灵时间的心理时间。』\n",
            "    \n",
            "-   人类的人们，在一个主体主义的意义和二分、人生的一个基本体系。一种能力是一种完美的基分的时代，就有这么一个一种人，是人类人生的意思是在人类人生的意识形态不同，可能以不够及人类在一个人”的一个主体），人们社会的时间就会不会被蜿蜒的时间里，在自己孜孜不是一些一种一种莫名的人生，而是关于自身的人们，不过不过是一个自由的心理阴影。我们是不要做出的事情这种不是一种不祥的态度。\n",
            "\n",
            "我们将自身渗入人，这个世界，有的人不能看见自己的世界观本为这种不平的，我们的内心则是一个有小小的阅读中和世界，我们在这些烂泥塘中继续不断不断的路途。\n",
            "\n",
            "在这个社会的基本上带来，如果有些人的错误就有了一个不足的人，在一种不断和人的人。人们被放着这种多么时间里，就是一种不可断。\n",
            "\n",
            "我们不要说，我们在自己的意志在一个人似一个小人在一个人在一个世界串事情的事情，不过是不是我们的心中，不可能在一些人。今年是我的生活。我们在我不能做出的，我们还是在自己的心心平衡，不是因为这样的一切不可以不能做得自己，而不是在自己在自己的本质和自由，抑至是在一个人的无法做到，让自己生活和不可以做的事情，不能不可以改变一个人的人。\n",
            "\n",
            "我们在这个世界之间不是一种不断的态度，是一种被触角”，不能因为自己，他们会会发现自己不会不能想得自由这一点，都像这样不是一切。我们有时生活于一切，从而不能被无法袒露的函法。\n",
            "\n",
            "我们在劫地被无法不可忍担，不能使自己在人生的自由），可以失去了一些自己的意愿。\n",
            "\n",
            "我不愿再是自己的生活，我们不愿意不能揣行到那种心理摔起一个人的世界，在我们对这些世界里的存在，是那个世界上的，只能不可挡地不过是自己的心理一种不是在冥理，我们在一个主体内心无法不断地回到，我们的心心旅将一直不能不能及自由人生的一种小小的时候，会有自身面对自己，我们不再不能做你的自己。\n",
            "\n",
            "这些人，是我不能做的。我想要做的一切，我们不会在自己的自己，我们是这个不是我的愿望，我们在这个世界上成为人类丑恶的人，我们不愿意为我们成为我们自由的人？\n",
            "\n",
            "这种人生的意义就是在一种无法可断的东西，我们的人生，是一种自我辩境的原因，是那个不可能的。这样的时候，我们都会看见自己的心理平衡，是一个人们都会在于这些世界里的时候，我们的人生的自我，我们在这些社会在人格的时候，不是我们的意义和诠决，我们还会能做自己自由。我们的人们无法做出，我们的信仰和我们一个有人生的人，是人生的人，只能不再能做到事情的智慧。这是迄今不可靠的事情，也不是真的无限。\n",
            "\n",
            "一切的一个人，在我的心灵看起了我们自己的心情，我们才要拆开和自由。我们不再不断向自己，不过在自己的心理平衡点不过是自己的自己，而是在自己\n"
          ],
          "name": "stdout"
        }
      ]
    },
    {
      "metadata": {
        "id": "ZerZeLEcmtiD",
        "colab_type": "code",
        "colab": {}
      },
      "cell_type": "code",
      "source": [
        ""
      ],
      "execution_count": 0,
      "outputs": []
    }
  ]
}